Node.js fs.readdir recursive directory search
This one uses the maximum amount of new, buzzwordy features available in node 8, including Promises, util/promisify, destructuring, async-await, map+reduce and more, making your co-workers scratch their heads as they try to figure out what is going on.
Node 8+
No external dependencies.
const { promisify } = require('util');
const { resolve } = require('path');
const fs = require('fs');
const readdir = promisify(fs.readdir);
const stat = promisify(fs.stat);
async function getFiles(dir) {
const subdirs = await readdir(dir);
const files = await Promise.all(subdirs.map(async (subdir) => {
const res = resolve(dir, subdir);
return (await stat(res)).isDirectory() ? getFiles(res) : res;
}));
return files.reduce((a, f) => a.concat(f), []);
}
Usage
getFiles(__dirname)
.then(files => console.log(files))
.catch(e => console.error(e));
Node 10.10+
Updated for node 10+ with even more whizbang:
const { resolve } = require('path');
const { readdir } = require('fs').promises;
async function getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
const files = await Promise.all(dirents.map((dirent) => {
const res = resolve(dir, dirent.name);
return dirent.isDirectory() ? getFiles(res) : res;
}));
return Array.prototype.concat(...files);
}
Note that starting with node 11.15.0 you can use files.flat()
instead of Array.prototype.concat(...files)
to flatten the files array.
Node 11+
If you want to blow everybody's head up completely, you can use the following version using async iterators. In addition to being really cool, it also allows consumers to pull results one-at-a-time, making it better suited for really large directories.
const { resolve } = require('path');
const { readdir } = require('fs').promises;
async function* getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
for (const dirent of dirents) {
const res = resolve(dir, dirent.name);
if (dirent.isDirectory()) {
yield* getFiles(res);
} else {
yield res;
}
}
}
Usage has changed because the return type is now an async iterator instead of a promise
;(async () => {
for await (const f of getFiles('.')) {
console.log(f);
}
})()
In case somebody is interested, I've written more about async iterators here: https://qwtel.com/posts/software/async-generators-in-the-wild/
Just in case anyone finds it useful, I also put together a synchronous version.
var walk = function(dir) {
var results = [];
var list = fs.readdirSync(dir);
list.forEach(function(file) {
file = dir + '/' + file;
var stat = fs.statSync(file);
if (stat && stat.isDirectory()) {
/* Recurse into a subdirectory */
results = results.concat(walk(file));
} else {
/* Is a file */
results.push(file);
}
});
return results;
}
Tip: To use less resources when filtering. Filter within this function itself. E.g. Replace results.push(file);
with below code. Adjust as required:
file_type = file.split(".").pop();
file_name = file.split(/(\\|\/)/g).pop();
if (file_type == "json") results.push(file);
A. Have a look at the file module. It has a function called walk:
file.walk(start, callback)
Navigates a file tree, calling callback for each directory, passing in (null, dirPath, dirs, files).
This may be for you! And yes, it is async. However, I think you would have to aggregate the full path's yourself, if you needed them.
B. An alternative, and even one of my favourites: use the unix find
for that. Why do something again, that has already been programmed? Maybe not exactly what you need, but still worth checking out:
var execFile = require('child_process').execFile;
execFile('find', [ 'somepath/' ], function(err, stdout, stderr) {
var file_list = stdout.split('\n');
/* now you've got a list with full path file names */
});
Find has a nice build-in caching mechanism that makes subsequent searches very fast, as long as only few folder have changed.
There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).
A parallel loop would look like this:
var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var pending = list.length;
if (!pending) return done(null, results);
list.forEach(function(file) {
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
if (!--pending) done(null, results);
});
} else {
results.push(file);
if (!--pending) done(null, results);
}
});
});
});
};
A serial loop would look like this:
var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) return done(null, results);
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
};
And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):
walk(process.env.HOME, function(err, results) {
if (err) throw err;
console.log(results);
});
EDIT: Improved examples.