From 1287666a0e94baf135914aa3ffa848492a2c699a Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 11:42:05 +0500 Subject: [PATCH 1/9] refactor: control all directory walking via Walker class --- src/api/async.js | 34 +++++---- src/api/fns.js | 69 +++++++------------ src/api/shared.js | 169 --------------------------------------------- src/api/sync.js | 32 ++++++--- src/api/walker.js | 171 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 241 insertions(+), 234 deletions(-) delete mode 100644 src/api/shared.js create mode 100644 src/api/walker.js diff --git a/src/api/async.js b/src/api/async.js index 3a8f793..4a56e09 100644 --- a/src/api/async.js +++ b/src/api/async.js @@ -1,27 +1,35 @@ const { readdir } = require("../compat/fs"); const Queue = require("./queue"); -const { makeWalkerFunctions, readdirOpts } = require("./shared"); +const { Walker, readdirOpts } = require("./walker"); -function promise(dir, options) { +function promise(rootDirectory, options) { return new Promise((resolve, reject) => { - callback(dir, options, (err, output) => { + callback(rootDirectory, options, (err, output) => { if (err) return reject(err); resolve(output); }); }); } -function callback(dirPath, options, callback) { - const { init, walkSingleDir } = makeWalkerFunctions(); +function callback(rootDirectory, options, callback) { + let walker = new Walker(options, callback); + walker.registerWalker(walkDirectory); + walker.state.queue = new Queue(walker.callbackInvoker); - const { state, callbackInvoker, dir } = init(dirPath, options, callback); - state.queue = new Queue(callbackInvoker); - - // perf: we pass everything in arguments to avoid creating a closure - walk(state, dir, options.maxDepth, walkSingleDir); + const root = walker.normalizePath(rootDirectory); + walker.walk(walker, root, options.maxDepth); } -function walk(state, dir, currentDepth, walkSingleDir) { +/** + * + * @param {Walker} walker + * @param {string} directoryPath + * @param {number} currentDepth + * @returns + */ +function walkDirectory(walker, directoryPath, currentDepth) { + const { state } = walker; + state.queue.queue(); if (currentDepth < 0) { @@ -29,13 +37,13 @@ function walk(state, dir, currentDepth, walkSingleDir) { return; } - readdir(dir, readdirOpts, function (error, dirents) { + readdir(directoryPath, readdirOpts, function(error, dirents) { if (error) { state.queue.dequeue(error, state); return; } - walkSingleDir(walk, state, dir, dirents, currentDepth); + walker.processDirents(directoryPath, dirents, currentDepth); state.queue.dequeue(null, state); }); } diff --git a/src/api/fns.js b/src/api/fns.js index 19a7f1c..1ab1a84 100644 --- a/src/api/fns.js +++ b/src/api/fns.js @@ -10,38 +10,32 @@ module.exports.getArrayGroup = function() { }; /** PUSH FILE */ -module.exports.pushFileFilterAndCount = function(filters) { - return function(filename, _files, _dir, state) { - if (filters.every((filter) => filter(filename, false))) - state.counts.files++; - }; +module.exports.pushFileFilterAndCount = function(walker, filename) { + if (walker.options.filters.every((filter) => filter(filename, false))) + module.exports.pushFileCount(walker); }; -module.exports.pushFileFilter = function(filters) { - return function(filename, files) { - if (filters.every((filter) => filter(filename, false))) - files.push(filename); - }; +module.exports.pushFileFilter = function(walker, filename, files) { + if (walker.options.filters.every((filter) => filter(filename, false))) + files.push(filename); }; -module.exports.pushFileCount = function(_filename, _files, _dir, state) { - state.counts.files++; +module.exports.pushFileCount = function(walker) { + walker.state.counts.files++; }; -module.exports.pushFile = function(filename, files) { +module.exports.pushFile = function(_walker, filename, files) { files.push(filename); }; /** PUSH DIR */ -module.exports.pushDir = function(dirPath, paths) { +module.exports.pushDir = function(_walker, dirPath, paths) { paths.push(dirPath); }; -module.exports.pushDirFilter = function(filters) { - return function(dirPath, paths) { - if (filters.every((filter) => filter(dirPath, true))) { - paths.push(dirPath); - } - }; +module.exports.pushDirFilter = function(walker, dirPath, paths) { + if (walker.options.filters.every((filter) => filter(dirPath, true))) { + paths.push(dirPath); + } }; /** JOIN PATH */ @@ -53,31 +47,20 @@ module.exports.joinPath = function(filename) { }; /** WALK DIR */ -module.exports.walkDirExclude = function(exclude) { - return function(walk, state, path, dir, currentDepth, walkSingleDir) { - if (!exclude(dir, path)) { - module.exports.walkDir( - walk, - state, - path, - dir, - currentDepth, - walkSingleDir - ); - } - }; -}; - -module.exports.walkDir = function( - walk, - state, +module.exports.walkDirExclude = function( + walker, path, - _dir, - currentDepth, - walkSingleDir + directoryName, + currentDepth ) { - state.counts.dirs++; - walk(state, path, currentDepth, walkSingleDir); + if (!walker.options.excludeFn(directoryName, path)) { + module.exports.walkDir(walker, path, directoryName, currentDepth); + } +}; + +module.exports.walkDir = function(walker, path, _directoryName, currentDepth) { + walker.state.counts.dirs++; + walker.walk(walker, path, currentDepth); }; /** GROUP FILES */ diff --git a/src/api/shared.js b/src/api/shared.js deleted file mode 100644 index 80ea432..0000000 --- a/src/api/shared.js +++ /dev/null @@ -1,169 +0,0 @@ -const { sep, resolve: pathResolve } = require("path"); -const { cleanPath } = require("../utils"); -const fns = require("./fns"); -const readdirOpts = { withFileTypes: true }; - -module.exports = { makeWalkerFunctions, readdirOpts }; - -// We cannot simply export `init` and `walkSingleDir` directly. We need to rebuild them on every call. -// Otherwise, the functions setup by `buildFunctions` can be overwritten if a new concurrent async -// call is performed while walking is still happening from another call. -function makeWalkerFunctions() { - function init(dir, options, callback, isSync) { - if (options.resolvePaths) dir = pathResolve(dir); - if (options.normalizePath) dir = cleanPath(dir); - - /* We use a local state object instead of direct global variables so that each function - * execution is independent of each other. - */ - const state = { - // Perf: we explicitly tell the compiler to optimize for String arrays - paths: [""].slice(0, 0), - counts: { files: 0, dirs: 0 }, - options, - callback, - }; - - /* - * Perf: We conditionally change functions according to options. This gives a slight - * performance boost. Since these functions are so small, they are automatically inlined - * by the engine so there's no function call overhead (in most cases). - */ - buildFunctions(options, isSync); - - return { state, callbackInvoker, dir }; - } - - function walkSingleDir(walk, state, dir, dirents, currentDepth) { - pushDir(dir, state.paths); - // in cases where we have / as path - if (dir === sep) dir = ""; - - const files = getArray(state); - - for (var i = 0; i < dirents.length; ++i) { - const dirent = dirents[i]; - - if (dirent.isFile()) { - const filename = joinPath(dirent.name, dir); - pushFile(filename, files, dir, state); - } else if (dirent.isDirectory()) { - let dirPath = `${dir}${sep}${dirent.name}`; - walkDir( - walk, - state, - dirPath, - dirent.name, - currentDepth - 1, - walkSingleDir - ); - } - // perf: we can avoid entering the condition block if .withSymlinks is not set - // by using symlinkResolver !== fns.empty; this helps us avoid wasted allocations - // which are probably very minor - else if (dirent.isSymbolicLink() && symlinkResolver !== fns.empty) { - let path = `${dir}${sep}${dirent.name}`; - symlinkResolver(path, state, (stat, resolvedPath) => { - if (stat.isFile()) { - pushFile(resolvedPath, files, dir, state); - } else if (stat.isDirectory()) { - walkDir( - walk, - state, - resolvedPath, - dirent.name, - currentDepth - 1, - walkSingleDir - ); - } - }); - } - } - - groupFiles(dir, files, state); - } - - function buildFunctions(options, isSync) { - const { - filters, - onlyCountsVar, - includeBasePath, - includeDirs, - groupVar, - excludeFn, - excludeFiles, - resolveSymlinks, - } = options; - - buildPushFile(filters, onlyCountsVar, excludeFiles); - - pushDir = includeDirs - ? filters.length - ? fns.pushDirFilter(filters) - : fns.pushDir - : fns.empty; - - // build function for joining paths - joinPath = includeBasePath ? fns.joinPathWithBasePath : fns.joinPath; - - // build recursive walk directory function - walkDir = excludeFn ? fns.walkDirExclude(excludeFn) : fns.walkDir; - - // build groupFiles function for grouping files - groupFiles = groupVar ? fns.groupFiles : fns.empty; - getArray = groupVar ? fns.getArrayGroup : fns.getArray; - - buildSymlinkResolver(resolveSymlinks, isSync); - - buildCallbackInvoker(onlyCountsVar, isSync); - } - - function buildPushFile(filters, onlyCountsVar, excludeFiles) { - if (excludeFiles) { - pushFile = fns.empty; - return; - } - - if (filters.length && onlyCountsVar) { - pushFile = fns.pushFileFilterAndCount(filters); - } else if (filters.length) { - pushFile = fns.pushFileFilter(filters); - } else if (onlyCountsVar) { - pushFile = fns.pushFileCount; - } else { - pushFile = fns.pushFile; - } - } - - function buildCallbackInvoker(onlyCountsVar, isSync) { - if (onlyCountsVar) { - callbackInvoker = isSync - ? fns.callbackInvokerOnlyCountsSync - : fns.callbackInvokerOnlyCountsAsync; - } else { - callbackInvoker = isSync - ? fns.callbackInvokerDefaultSync - : fns.callbackInvokerDefaultAsync; - } - } - - function buildSymlinkResolver(resolveSymlinks, isSync) { - if (!resolveSymlinks) return; - - symlinkResolver = isSync - ? fns.resolveSymlinksSync - : fns.resolveSymlinksAsync; - } - - /* Dummies that will be filled later conditionally based on options */ - var pushFile = fns.empty; - var pushDir = fns.empty; - var walkDir = fns.empty; - var joinPath = fns.empty; - var groupFiles = fns.empty; - var callbackInvoker = fns.empty; - var getArray = fns.empty; - var symlinkResolver = fns.empty; - - return { init, walkSingleDir }; -} diff --git a/src/api/sync.js b/src/api/sync.js index d18cd63..25411e8 100644 --- a/src/api/sync.js +++ b/src/api/sync.js @@ -1,24 +1,38 @@ const { readdirSync } = require("../compat/fs"); -const { makeWalkerFunctions, readdirOpts } = require("./shared"); +const { Walker, readdirOpts } = require("./shared"); // For sync usage, we can reuse the same walker functions, because // there will not be concurrent calls overwriting the 'built functions' // in the middle of everything. -const { init, walkSingleDir } = makeWalkerFunctions(); +// const { init, walkSingleDir } = makeWalkerFunctions(); -function sync(dirPath, options) { - const { state, callbackInvoker, dir } = init(dirPath, options, null, true); - walk(state, dir, options.maxDepth); - return callbackInvoker(state); +function sync(rootDirectory, options) { + options.isSync = true; + + let walker = new Walker(options); + walker.registerWalker(walkDirectory); + + const root = walker.normalizePath(rootDirectory); + walker.walk(walker, root, options.maxDepth); + + return walker.callbackInvoker(walker.state); } -function walk(state, dir, currentDepth) { +/** + * + * @param {Walker} walker + * @param {string} directoryPath + * @param {number} currentDepth + * @returns + */ +function walkDirectory(walker, directoryPath, currentDepth) { if (currentDepth < 0) { return; } + const { state } = walker; try { - const dirents = readdirSync(dir, readdirOpts); - walkSingleDir(walk, state, dir, dirents, currentDepth); + const dirents = readdirSync(directoryPath, readdirOpts); + walker.processDirents(directoryPath, dirents, currentDepth); } catch (e) { if (!state.options.suppressErrors) throw e; } diff --git a/src/api/walker.js b/src/api/walker.js new file mode 100644 index 0000000..549e6c4 --- /dev/null +++ b/src/api/walker.js @@ -0,0 +1,171 @@ +const { sep, resolve: pathResolve } = require("path"); +const { cleanPath } = require("../utils"); +const fns = require("./fns"); +const readdirOpts = { withFileTypes: true }; + +function Walker(options, callback) { + /* Dummy functions that will be filled later conditionally based on options */ + this.pushFile = fns.empty; + this.pushDir = fns.empty; + this.walkDir = fns.empty; + this.joinPath = fns.empty; + this.groupFiles = fns.empty; + this.callbackInvoker = fns.empty; + this.getArray = fns.empty; + this.symlinkResolver = fns.empty; + + this.options = options; + + /* Perf: We use a local state per walker instead of direct global variables + * so that each walker execution is independent. + */ + this.state = { + // Perf: we explicitly tell the compiler to optimize for String arrays + paths: [""].slice(0, 0), + counts: { files: 0, dirs: 0 }, + options, + callback, + }; + + /* + * Perf: We conditionally change functions according to options. This gives a slight + * performance boost. Since these functions are so small, they are automatically inlined + * by the javascript engine so there's no function call overhead (in most cases). + */ + this.buildFunctions(); +} + +Walker.prototype.normalizePath = function normalizePath(path) { + if (this.options.resolvePaths) path = pathResolve(path); + if (this.options.normalizePath) path = cleanPath(path); + return path; +}; + +Walker.prototype.registerWalker = function registerWalker(walkerFunction) { + this.walk = walkerFunction; +}; + +Walker.prototype.processDirents = function processDirents( + directoryPath, + dirents, + currentDepth +) { + this.pushDir(this, directoryPath, this.state.paths); + + // in cases where we have / as path + if (directoryPath === sep) directoryPath = ""; + + const files = this.getArray(this.state); + + for (var i = 0; i < dirents.length; ++i) { + const dirent = dirents[i]; + + if (dirent.isFile()) { + const filename = this.joinPath(dirent.name, directoryPath); + this.pushFile(this, filename, files); + } else if (dirent.isDirectory()) { + let path = `${directoryPath}${sep}${dirent.name}`; + this.walkDir(this, path, dirent.name, currentDepth - 1); + } + // perf: we can avoid entering the condition block if .withSymlinks is not set + // by using symlinkResolver !== fns.empty; this helps us avoid wasted allocations + // which are probably very minor + else if (dirent.isSymbolicLink() && this.symlinkResolver !== fns.empty) { + let path = `${directoryPath}${sep}${dirent.name}`; + this.symlinkResolver(path, this.state, (stat, resolvedPath) => { + if (stat.isFile()) { + this.pushFile(this, resolvedPath, files); + } else if (stat.isDirectory()) { + this.walkDir(this, resolvedPath, dirent.name, currentDepth - 1); + } + }); + } + } + + this.groupFiles(directoryPath, files, this.state); +}; + +Walker.prototype.buildFunctions = function buildFunctions() { + const { + filters, + onlyCountsVar, + includeBasePath, + includeDirs, + groupVar, + excludeFn, + excludeFiles, + resolveSymlinks, + isSync, + } = this.options; + + this.buildPushFile(filters, onlyCountsVar, excludeFiles); + + this.pushDir = includeDirs + ? filters.length + ? fns.pushDirFilter + : fns.pushDir + : fns.empty; + + // build function for joining paths + this.joinPath = includeBasePath ? fns.joinPathWithBasePath : fns.joinPath; + + // build recursive walk directory function + this.walkDir = excludeFn ? fns.walkDirExclude : fns.walkDir; + + // build groupFiles function for grouping files + this.groupFiles = groupVar ? fns.groupFiles : fns.empty; + this.getArray = groupVar ? fns.getArrayGroup : fns.getArray; + + this.buildSymlinkResolver(resolveSymlinks, isSync); + + this.buildCallbackInvoker(onlyCountsVar, isSync); +}; + +Walker.prototype.buildPushFile = function buildPushFile( + filters, + onlyCountsVar, + excludeFiles +) { + if (excludeFiles) { + this.pushFile = fns.empty; + return; + } + + if (filters.length && onlyCountsVar) { + this.pushFile = fns.pushFileFilterAndCount; + } else if (filters.length) { + this.pushFile = fns.pushFileFilter; + } else if (onlyCountsVar) { + this.pushFile = fns.pushFileCount; + } else { + this.pushFile = fns.pushFile; + } +}; + +Walker.prototype.buildCallbackInvoker = function buildCallbackInvoker( + onlyCountsVar, + isSync +) { + if (onlyCountsVar) { + this.callbackInvoker = isSync + ? fns.callbackInvokerOnlyCountsSync + : fns.callbackInvokerOnlyCountsAsync; + } else { + this.callbackInvoker = isSync + ? fns.callbackInvokerDefaultSync + : fns.callbackInvokerDefaultAsync; + } +}; + +Walker.prototype.buildSymlinkResolver = function buildSymlinkResolver( + resolveSymlinks, + isSync +) { + if (!resolveSymlinks) return; + + this.symlinkResolver = isSync + ? fns.resolveSymlinksSync + : fns.resolveSymlinksAsync; +}; + +module.exports = { Walker, readdirOpts }; From 5487172db3d4543196f2f9fba2d99e584dc5bd39 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 11:47:00 +0500 Subject: [PATCH 2/9] fix: Cannot find module './shared' --- src/api/sync.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/sync.js b/src/api/sync.js index 25411e8..8c2bd0b 100644 --- a/src/api/sync.js +++ b/src/api/sync.js @@ -1,5 +1,5 @@ const { readdirSync } = require("../compat/fs"); -const { Walker, readdirOpts } = require("./shared"); +const { Walker, readdirOpts } = require("./walker"); // For sync usage, we can reuse the same walker functions, because // there will not be concurrent calls overwriting the 'built functions' From c28680c95488214cf4b99c81a673e84da6d31a22 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 11:56:11 +0500 Subject: [PATCH 3/9] fix: properly handle leading path seperator --- __tests__/fdir.test.js | 14 ++++++++------ src/api/fns.js | 2 +- src/api/walker.js | 5 +---- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/__tests__/fdir.test.js b/__tests__/fdir.test.js index 1f4dae0..47fbbb8 100644 --- a/__tests__/fdir.test.js +++ b/__tests__/fdir.test.js @@ -137,15 +137,17 @@ describe.each(["withPromise", "sync"])("fdir %s", (type) => { test("recurse root (files should not contain multiple /)", async () => { mock({ - "/": { - etc: { - hosts: "dooone", - }, + "/etc": { + hosts: "dooone", }, }); - const api = new fdir().normalize().crawl("/"); + const api = new fdir() + .withBasePath() + .normalize() + .crawl("/"); const files = await api[type](); - expect(files.every((file) => !file.includes("/"))).toBe(true); + console.log(files); + expect(files.every((file) => !file.includes("//"))).toBe(true); mock.restore(); }); diff --git a/src/api/fns.js b/src/api/fns.js index 1ab1a84..1893517 100644 --- a/src/api/fns.js +++ b/src/api/fns.js @@ -40,7 +40,7 @@ module.exports.pushDirFilter = function(walker, dirPath, paths) { /** JOIN PATH */ module.exports.joinPathWithBasePath = function(filename, dir) { - return `${dir}${sep}${filename}`; + return `${dir}${dir.endsWith(sep) ? "" : sep}${filename}`; }; module.exports.joinPath = function(filename) { return filename; diff --git a/src/api/walker.js b/src/api/walker.js index 549e6c4..fe6b8a2 100644 --- a/src/api/walker.js +++ b/src/api/walker.js @@ -52,9 +52,6 @@ Walker.prototype.processDirents = function processDirents( ) { this.pushDir(this, directoryPath, this.state.paths); - // in cases where we have / as path - if (directoryPath === sep) directoryPath = ""; - const files = this.getArray(this.state); for (var i = 0; i < dirents.length; ++i) { @@ -64,7 +61,7 @@ Walker.prototype.processDirents = function processDirents( const filename = this.joinPath(dirent.name, directoryPath); this.pushFile(this, filename, files); } else if (dirent.isDirectory()) { - let path = `${directoryPath}${sep}${dirent.name}`; + let path = fns.joinPathWithBasePath(dirent.name, directoryPath); //`${directoryPath}${sep}${dirent.name}`; this.walkDir(this, path, dirent.name, currentDepth - 1); } // perf: we can avoid entering the condition block if .withSymlinks is not set From 686b81f8618cacc87c27f28113d24077135ef608 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 11:57:56 +0500 Subject: [PATCH 4/9] fix: handle leading path sep for symlinks --- src/api/walker.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/walker.js b/src/api/walker.js index fe6b8a2..46be9e0 100644 --- a/src/api/walker.js +++ b/src/api/walker.js @@ -61,14 +61,14 @@ Walker.prototype.processDirents = function processDirents( const filename = this.joinPath(dirent.name, directoryPath); this.pushFile(this, filename, files); } else if (dirent.isDirectory()) { - let path = fns.joinPathWithBasePath(dirent.name, directoryPath); //`${directoryPath}${sep}${dirent.name}`; + let path = fns.joinPathWithBasePath(dirent.name, directoryPath); this.walkDir(this, path, dirent.name, currentDepth - 1); } // perf: we can avoid entering the condition block if .withSymlinks is not set // by using symlinkResolver !== fns.empty; this helps us avoid wasted allocations // which are probably very minor else if (dirent.isSymbolicLink() && this.symlinkResolver !== fns.empty) { - let path = `${directoryPath}${sep}${dirent.name}`; + let path = fns.joinPathWithBasePath(dirent.name, directoryPath); this.symlinkResolver(path, this.state, (stat, resolvedPath) => { if (stat.isFile()) { this.pushFile(this, resolvedPath, files); From 42fa0964dddee58437d7cb4f684b7b73165692ee Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 12:07:14 +0500 Subject: [PATCH 5/9] fix: remove unneeded console.log --- __tests__/fdir.test.js | 1 - 1 file changed, 1 deletion(-) diff --git a/__tests__/fdir.test.js b/__tests__/fdir.test.js index 47fbbb8..7bd1a5f 100644 --- a/__tests__/fdir.test.js +++ b/__tests__/fdir.test.js @@ -146,7 +146,6 @@ describe.each(["withPromise", "sync"])("fdir %s", (type) => { .normalize() .crawl("/"); const files = await api[type](); - console.log(files); expect(files.every((file) => !file.includes("//"))).toBe(true); mock.restore(); }); From f20b27f357a343ecb3da6ae0024a974523d2d873 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 12:16:59 +0500 Subject: [PATCH 6/9] refactor: build pushDir in its own function --- src/api/walker.js | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/api/walker.js b/src/api/walker.js index 46be9e0..fcdf08c 100644 --- a/src/api/walker.js +++ b/src/api/walker.js @@ -95,14 +95,6 @@ Walker.prototype.buildFunctions = function buildFunctions() { isSync, } = this.options; - this.buildPushFile(filters, onlyCountsVar, excludeFiles); - - this.pushDir = includeDirs - ? filters.length - ? fns.pushDirFilter - : fns.pushDir - : fns.empty; - // build function for joining paths this.joinPath = includeBasePath ? fns.joinPathWithBasePath : fns.joinPath; @@ -113,25 +105,33 @@ Walker.prototype.buildFunctions = function buildFunctions() { this.groupFiles = groupVar ? fns.groupFiles : fns.empty; this.getArray = groupVar ? fns.getArrayGroup : fns.getArray; + this.buildPushFile(filters, onlyCountsVar, excludeFiles); + + this.buildPushDir(includeDirs, filters); + this.buildSymlinkResolver(resolveSymlinks, isSync); this.buildCallbackInvoker(onlyCountsVar, isSync); }; +Walker.prototype.buildPushDir = function buildPushDir(includeDirs, filters) { + if (!includeDirs) return; + + if (filters.length) this.pushDir = fns.pushDirFilter; + else this.pushDir = fns.pushDir; +}; + Walker.prototype.buildPushFile = function buildPushFile( filters, onlyCountsVar, excludeFiles ) { - if (excludeFiles) { - this.pushFile = fns.empty; - return; - } + if (excludeFiles) return; - if (filters.length && onlyCountsVar) { - this.pushFile = fns.pushFileFilterAndCount; - } else if (filters.length) { - this.pushFile = fns.pushFileFilter; + if (filters.length) { + this.pushFile = onlyCountsVar + ? fns.pushFileFilterAndCount + : fns.pushFileFilter; } else if (onlyCountsVar) { this.pushFile = fns.pushFileCount; } else { From 739d7ac83640516d3b8d3aea7fb3816b57d65a03 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Mon, 19 Apr 2021 13:16:43 +0500 Subject: [PATCH 7/9] docs: add ARCHITECTURE.md --- ARCHITECTURE.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 ARCHITECTURE.md diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..8fa4a51 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,41 @@ +# Architecture + +This document describes the high-level architecture of `fdir`. If you want to familiarize yourself with the code base, you are in the right place! + +--- + +On the highest level, `fdir` is a library that accepts a path to a directory as input and outputs all the file paths in that directory recrusively. + +More specifically, input data consists of a path to a directory (`rootDirectory`) and different flags and filters to control the walking process. To increase performance, `fdir` builds internal functions conditionally based on the passed flags. Since these "conditional" functions are tiny, they are inlined by the Javascript Engine reducing branching & allocations. + +## Entry Points + +`index.js` exports the main `fdir` class and it is the main entry point. However, there is nothing of importance in this file aside from the export. + +`src/builder/index.js` contains the main API of `fdir` exposed via a `Builder` class. This is where all the flags & filters are built and passed (as an `options` Object) onto the core of `fdir`. + +## Code Map + +This section talks briefly about all the directories and what each file in each directory does. + +### `src/api` + +This is the core of `fdir`. + +**`walker.js`:** This contains the `Walker` class which is responsible for controlling and maintaining the state of the directory walker. It builds the conditional functions, processes the `Dirents` and delegates the actual filesystem directory reading to sync/async APIs. + +**`async.js`** This contains the asynchronous (`fs.readdir`) logic. This is the starting point of the async crawling process. + +**`queue.js`** This contains a tiny `Queue` class to make sure `fdir` doesn't early exit during walking. It increments a counter for each "walk" queued and decrements it when it finishes. Once the counter hits 0, it calls the callback which returns the output to the user. + +**`sync.js`** This contains the synchronous (`fs.readdirSync`) logic. This is the starting point of the sync crawling process. + +**`fns.js`** This contains the implementations of all the conditional functions. + +### `src/builder` + +This is what gets exposed to the developer and contains 2 builders that aid in building an `options` object to control various aspects of the walker. + +### `src/compat` + +Since `fdir` supports Node <= 10.0, this directory contains the compatibility code to bridge the newer (v10.0) filesystem API with the older (v8.0) filesystem API. From d1b5bbaa79fa99a874b6cdda165de7e45ed7a9f4 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Fri, 21 May 2021 17:49:12 +0500 Subject: [PATCH 8/9] refactor: add extensive commenting --- src/api/async.js | 37 +++++++++++++++++++++++++++++-------- src/api/fns.js | 10 +--------- src/api/queue.js | 12 +++++++++--- src/api/sync.js | 21 ++++++++++++--------- src/api/walker.js | 31 +++++++++++++++++++++++++++---- 5 files changed, 78 insertions(+), 33 deletions(-) diff --git a/src/api/async.js b/src/api/async.js index 4a56e09..d550483 100644 --- a/src/api/async.js +++ b/src/api/async.js @@ -2,29 +2,46 @@ const { readdir } = require("../compat/fs"); const Queue = require("./queue"); const { Walker, readdirOpts } = require("./walker"); -function promise(rootDirectory, options) { +/** + * This is basically a `promisify` around the callback function. + * @param {string} directoryPath Directory path to start walking from + * @param {Object} options The options to configure the Walker + * @returns {Promise} Promise that resolves to Output + */ +function promise(directoryPath, options) { return new Promise((resolve, reject) => { - callback(rootDirectory, options, (err, output) => { + callback(directoryPath, options, (err, output) => { if (err) return reject(err); resolve(output); }); }); } -function callback(rootDirectory, options, callback) { +/** + * Register a Walker and start walking asynchronously until we reach + * the end (or maxDepth); then call the callback function and exit. + * @param {string} directoryPath Directory path to start walking from + * @param {Object} options The options to configure the Walker + * @param {(error: Object, output: Object) => void} callback + */ +function callback(directoryPath, options, callback) { let walker = new Walker(options, callback); walker.registerWalker(walkDirectory); walker.state.queue = new Queue(walker.callbackInvoker); - const root = walker.normalizePath(rootDirectory); + const root = walker.normalizePath(directoryPath); walker.walk(walker, root, options.maxDepth); } /** + * Walk a directory asynchronously. This function is called internally + * by the Walker whenever it encounters a sub directory. * - * @param {Walker} walker - * @param {string} directoryPath - * @param {number} currentDepth + * Since this is async, we use a custom queue system to track all concurrent + * fs.readdir calls. Once the queue counter hits 0, we call the callback and exit. + * @param {Walker} walker The core Walker that controls the whole walking process (we don't use `this` to keep things explicit) + * @param {string} directoryPath Path to the directory + * @param {number} currentDepth The depth walker is at currently (value starts from options.maxDepth and decreases every time a sub directory is encountered) * @returns */ function walkDirectory(walker, directoryPath, currentDepth) { @@ -37,13 +54,17 @@ function walkDirectory(walker, directoryPath, currentDepth) { return; } + // Perf: Node >= 10 introduced withFileTypes that helps us + // skip an extra fs.stat call. + // Howver, since this API is not availble in Node < 10, I had to create + // a compatibility layer to support both variants. readdir(directoryPath, readdirOpts, function(error, dirents) { if (error) { state.queue.dequeue(error, state); return; } - walker.processDirents(directoryPath, dirents, currentDepth); + walker.processDirents(dirents, directoryPath, currentDepth); state.queue.dequeue(null, state); }); } diff --git a/src/api/fns.js b/src/api/fns.js index 1893517..367c984 100644 --- a/src/api/fns.js +++ b/src/api/fns.js @@ -1,15 +1,14 @@ const { sep } = require("path"); const fs = require("fs"); -/* GET ARRAY */ module.exports.getArray = function(state) { return state.paths; }; + module.exports.getArrayGroup = function() { return [""].slice(0, 0); }; -/** PUSH FILE */ module.exports.pushFileFilterAndCount = function(walker, filename) { if (walker.options.filters.every((filter) => filter(filename, false))) module.exports.pushFileCount(walker); @@ -27,7 +26,6 @@ module.exports.pushFile = function(_walker, filename, files) { files.push(filename); }; -/** PUSH DIR */ module.exports.pushDir = function(_walker, dirPath, paths) { paths.push(dirPath); }; @@ -38,7 +36,6 @@ module.exports.pushDirFilter = function(walker, dirPath, paths) { } }; -/** JOIN PATH */ module.exports.joinPathWithBasePath = function(filename, dir) { return `${dir}${dir.endsWith(sep) ? "" : sep}${filename}`; }; @@ -46,7 +43,6 @@ module.exports.joinPath = function(filename) { return filename; }; -/** WALK DIR */ module.exports.walkDirExclude = function( walker, path, @@ -63,14 +59,12 @@ module.exports.walkDir = function(walker, path, _directoryName, currentDepth) { walker.walk(walker, path, currentDepth); }; -/** GROUP FILES */ module.exports.groupFiles = function(dir, files, state) { state.counts.files += files.length; state.paths.push({ dir, files }); }; module.exports.empty = function() {}; -/** CALLBACK INVOKER */ module.exports.callbackInvokerOnlyCountsSync = function(state) { return state.counts; }; @@ -94,8 +88,6 @@ function callbackInvokerBuilder(output) { }; } -/** SYMLINK RESOLVER */ - module.exports.resolveSymlinksAsync = function(path, state, callback) { state.queue.queue(); diff --git a/src/api/queue.js b/src/api/queue.js index 43e9269..dbd99c3 100644 --- a/src/api/queue.js +++ b/src/api/queue.js @@ -1,14 +1,20 @@ +/** + * This is a custom stateless queue to track concurrent async fs calls. + * It increments a counter whenever a call is queued and decrements it + * as soon as it completes. When the counter hits 0, it calls onQueueEmpty. + * @param {(error: any, output: any)} onQueueEmpty the callback to call when queue is empty + */ function Queue(onQueueEmpty) { this.onQueueEmpty = onQueueEmpty; this.queuedCount = 0; } -Queue.prototype.queue = function () { +Queue.prototype.queue = function() { this.queuedCount++; }; -Queue.prototype.dequeue = function (...args) { - if (--this.queuedCount === 0) this.onQueueEmpty(...args); +Queue.prototype.dequeue = function(error, output) { + if (--this.queuedCount === 0 || error) this.onQueueEmpty(error, output); }; module.exports = Queue; diff --git a/src/api/sync.js b/src/api/sync.js index 8c2bd0b..2c73504 100644 --- a/src/api/sync.js +++ b/src/api/sync.js @@ -1,25 +1,28 @@ const { readdirSync } = require("../compat/fs"); const { Walker, readdirOpts } = require("./walker"); -// For sync usage, we can reuse the same walker functions, because -// there will not be concurrent calls overwriting the 'built functions' -// in the middle of everything. -// const { init, walkSingleDir } = makeWalkerFunctions(); - -function sync(rootDirectory, options) { +/** + * Register a Walker and start walking synchronously and return the result + * when we reach the end (or maxDepth). + * @param {string} directoryPath Directory path to start walking from + * @param {Object} options The options to configure the Walker + * @param {(error: Object, output: Object) => void} callback + */ +function sync(directoryPath, options) { options.isSync = true; let walker = new Walker(options); walker.registerWalker(walkDirectory); - const root = walker.normalizePath(rootDirectory); + const root = walker.normalizePath(directoryPath); walker.walk(walker, root, options.maxDepth); return walker.callbackInvoker(walker.state); } /** - * + * Walk a directory synchronously. This function is called internally + * by the Walker whenever it encounters a sub directory. * @param {Walker} walker * @param {string} directoryPath * @param {number} currentDepth @@ -32,7 +35,7 @@ function walkDirectory(walker, directoryPath, currentDepth) { const { state } = walker; try { const dirents = readdirSync(directoryPath, readdirOpts); - walker.processDirents(directoryPath, dirents, currentDepth); + walker.processDirents(dirents, directoryPath, currentDepth); } catch (e) { if (!state.options.suppressErrors) throw e; } diff --git a/src/api/walker.js b/src/api/walker.js index fcdf08c..af73582 100644 --- a/src/api/walker.js +++ b/src/api/walker.js @@ -1,3 +1,4 @@ +const { Dirent } = require("fs"); const { sep, resolve: pathResolve } = require("path"); const { cleanPath } = require("../utils"); const fns = require("./fns"); @@ -16,7 +17,7 @@ function Walker(options, callback) { this.options = options; - /* Perf: We use a local state per walker instead of direct global variables + /* We use a local state per walker instead of direct global variables * so that each walker execution is independent. */ this.state = { @@ -35,19 +36,35 @@ function Walker(options, callback) { this.buildFunctions(); } +/** + * Process the given path using `path.resolve` & `path.normalize` + * @param {string} path Path to normalize + * @returns {string} Normalized path + */ Walker.prototype.normalizePath = function normalizePath(path) { if (this.options.resolvePaths) path = pathResolve(path); if (this.options.normalizePath) path = cleanPath(path); return path; }; +/** + * Register the core directory walker function. + * This is used to by the sync/async walkers depending on usage. + * @param {(walker: Walker, directoryPath: string, currentDepth: number) => {}} walkerFunction + */ Walker.prototype.registerWalker = function registerWalker(walkerFunction) { this.walk = walkerFunction; }; +/** + * Process dirents recursively (and also resolve symlinks if needed) + * @param {Dirent[]} dirents + * @param {string} directoryPath + * @param {number} currentDepth + */ Walker.prototype.processDirents = function processDirents( - directoryPath, dirents, + directoryPath, currentDepth ) { this.pushDir(this, directoryPath, this.state.paths); @@ -65,8 +82,8 @@ Walker.prototype.processDirents = function processDirents( this.walkDir(this, path, dirent.name, currentDepth - 1); } // perf: we can avoid entering the condition block if .withSymlinks is not set - // by using symlinkResolver !== fns.empty; this helps us avoid wasted allocations - // which are probably very minor + // by using symlinkResolver !== fns.empty; this helps us avoid wasted allocations - + // which are probably very minor but still. else if (dirent.isSymbolicLink() && this.symlinkResolver !== fns.empty) { let path = fns.joinPathWithBasePath(dirent.name, directoryPath); this.symlinkResolver(path, this.state, (stat, resolvedPath) => { @@ -82,6 +99,9 @@ Walker.prototype.processDirents = function processDirents( this.groupFiles(directoryPath, files, this.state); }; +/** + * Build all the different walker functions based on options + */ Walker.prototype.buildFunctions = function buildFunctions() { const { filters, @@ -154,6 +174,9 @@ Walker.prototype.buildCallbackInvoker = function buildCallbackInvoker( } }; +/** + * + */ Walker.prototype.buildSymlinkResolver = function buildSymlinkResolver( resolveSymlinks, isSync From 69a18bb5ae71ed96482fb7097d688883f2ff6612 Mon Sep 17 00:00:00 2001 From: thecodrr Date: Fri, 21 May 2021 17:51:09 +0500 Subject: [PATCH 9/9] chore: add basic contributing.md --- CONTRIBUTING.md | 92 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..a1defcb --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,92 @@ +# Contributing + +When contributing to this repository, please first discuss the change you wish to make via issue, +email, or any other method with the owners of this repository before making a change. + +Please note we have a code of conduct, please follow it in all your interactions with the project. + +## Pull Request Process + +1. Ensure any install or build dependencies are removed before the end of the layer when doing a + build. +2. Update the README.md with details of changes to the interface, this includes new environment + variables, exposed ports, useful file locations and container parameters. +3. Increase the version numbers in any examples files and the README.md to the new version that this + Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). +4. You may merge the Pull Request in once you have the sign-off of two other developers, or if you + do not have permission to do that, you may request the second reviewer to merge it for you. + +## Code of Conduct + +### Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +### Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- The use of sexualized language or imagery and unwelcome sexual attention or + advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic + address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +### Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +### Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +### Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at [INSERT EMAIL ADDRESS]. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +### Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/