节点fs.readdir在文件太多的文件夹中冻结 [英] Node fs.readdir freezing in folders with too many files

查看:76
本文介绍了节点fs.readdir在文件太多的文件夹中冻结的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

在Node.js中,我必须读取文件夹中的文件,并且对于每个文件都获得文件处理程序信息,这是我最简单的使用fs.readdir的实现:

In Node.js I have to read files in a folder and for each file get file handler info, this is my simplest implementation using fs.readdir:

FileServer.prototype.listLocal = function (params) {
            var self = this;
            var options = {
                limit: 100,
                desc: 1
            };
            // override defaults
            for (var attrname in params) { options[attrname] = params[attrname]; }

            // media path is the media folder
            var mediaDir = path.join(self._options.mediaDir, path.sep);
            return new Promise((resolve, reject) => {
                fs.readdir(mediaDir, (error, results) => {
                    if (error) {
                        self.logger.error("FileServer.list error:%s", error);
                        return reject(error);
                    } else { // list files
                        // cut to max files
                        results = results.slice(0, options.limit);
                        // filter default ext
                        results = results.filter(item => {
                            return (item.indexOf('.mp3') > -1);
                        });
                        // format meta data
                        results = results.map(file => {
                            var filePath = path.join(self._options.mediaDir, path.sep, file);
                            var item = {
                                name: file,
                                path: filePath
                            };
                            const fd = fs.openSync(filePath, 'r');
                            var fstat = fs.fstatSync(fd);
                            // file size in bytes
                            item.size = fstat.size;
                            item.sizehr = self.formatSizeUnits(fstat.size);
                            // "Birth Time" Time of file creation. Set once when the file is created. 
                            item.birthtime = fstat.birthtime;
                            // "Modified Time" Time when file data last modified.
                            item.mtime = fstat.mtime;
                            // "Access Time" Time when file data last accessed.
                            item.atime = fstat.atime;
                            item.timestamp = new Date(item.mtime).getTime();
                            item.media_id = path.basename(filePath, '.mp3');

                            fs.closeSync(fd);//close file
                            return item;
                        });
                        if (options.desc) { // sort by most recent
                            results.sort(function (a, b) {
                                return b.timestamp - a.timestamp;
                            });
                        } else { // sort by older
                            results.sort(function (a, b) {
                                return a.timestamp - b.timestamp;
                            });
                        }
                        return resolve(results);
                    }
                })
            });
        }

这样,对于每个文件,我都会得到一组项目

so that for each file I get an array of items

{
  "name": "sample121.mp3",
  "path": "/data/sample121.mp3",
  "size": 5751405,
  "sizehr": "5.4850 MB",
  "birthtime": "2018-10-08T15:26:08.397Z",
  "mtime": "2018-10-08T15:26:11.650Z",
  "atime": "2018-10-10T09:01:48.534Z",
  "timestamp": 1539012371650,
  "media_id": "sample121"
}

也就是说,问题在于,当要列出的文件夹包含大量文件时,node.js fs.readdir可能会冻结Node I/O循环,比方说成千上万到数十万甚至更多. 这是一个已知问题-有关更多信息,请参见此处. 还计划以某种方式改进fs.readdir,例如流式传输-在此处关于这个.

That said, the problem is it's knonw that node.js fs.readdir may freeze Node I/O Loop when the folder to list has a large number of files, let's say from ten thousands to hundred thousands and more. This is a known issue - see here for more info. There are also plans to improve fs.readdir in a some way, like streaming - see here about this.

与此同时,我正在搜索此补丁,因为我的文件夹很大. 由于问题是事件循环被冻结,因此有人提出了使用process.nextTick的解决方案,该解决方案我已在此处集成

In the meanwhile I'm searching for like a patch to this, because my folders are pretty large. Since the problem is the Event Loop get frozen, someone proposed a solution using process.nextTick, that I have ensembled here

FileServer.prototype.listLocalNextTick = function (params) {
            var self = this;
            var options = {
                limit: 100,
                desc: 1
            };
            // override defaults
            for (var attrname in params) { options[attrname] = params[attrname]; }

            // media path is the media folder
            var mediaDir = path.join(self._options.mediaDir, path.sep);
            return new Promise((resolve, reject) => {
                var AsyncArrayProcessor = function (inArray, inEntryProcessingFunction) {
                    var elemNum = 0;
                    var arrLen = inArray.length;
                    var ArrayIterator = function () {
                        inEntryProcessingFunction(inArray[elemNum]);
                        elemNum++;
                        if (elemNum < arrLen) process.nextTick(ArrayIterator);
                    }
                    if (elemNum < arrLen) process.nextTick(ArrayIterator);
                }
                fs.readdir(mediaDir, function (error, results) {
                    if (error) {
                        self.logger.error("FileServer.list error:%s", error);
                        return reject(error);
                    }
                    // cut to max files
                    results = results.slice(0, options.limit);
                    // filter default ext
                    results = results.filter(item => {
                        return (item.indexOf('.mp3') > -1);
                    });
                    var ProcessDirectoryEntry = function (file) {
                        // This may be as complex as you may fit in a single event loop
                        var filePath = path.join(self._options.mediaDir, path.sep, file);
                        var item = {
                            name: file,
                            path: filePath
                        };
                        const fd = fs.openSync(filePath, 'r');
                        var fstat = fs.fstatSync(fd);
                        // file size in bytes
                        item.size = fstat.size;
                        item.sizehr = self.formatSizeUnits(fstat.size);
                        // "Birth Time" Time of file creation. Set once when the file is created. 
                        item.birthtime = fstat.birthtime;
                        // "Modified Time" Time when file data last modified.
                        item.mtime = fstat.mtime;
                        // "Access Time" Time when file data last accessed.
                        item.atime = fstat.atime;
                        item.timestamp = new Date(item.mtime).getTime();
                        item.media_id = path.basename(filePath, '.mp3');
                        // map to file item
                        file = item;
                    }//ProcessDirectoryEntry
                    // LP: fs.readdir() callback is finished, event loop continues...
                    AsyncArrayProcessor(results, ProcessDirectoryEntry);
                    if (options.desc) { // sort by most recent
                        results.sort(function (a, b) {
                            return b.timestamp - a.timestamp;
                        });
                    } else { // sort by older
                        results.sort(function (a, b) {
                            return a.timestamp - b.timestamp;
                        });
                    }
                    return resolve(results);
                });
            });
        }//listLocalNextTick

这似乎避免了最初的问题,但是我再也无法通过以前使用文件处理程序将文件列表映射到项目,因为在文件列表上运行AsyncArrayProcessor时,因此在每个文件条目上运行ProcessDirectoryEntry process.nextTick的异步特性导致我无法找回先前在listLocal函数中修改过的results数组,在该函数中,我只是对results数组进行了迭代array.map. 如何修补listLocalNextTick以使其表现与listLocal相似但保持process.nextTick方法?

This seems to avoid the original issue, but I cannot anymore map the files lists to the items with file handler I did before, because when running the AsyncArrayProcessor on the files list, thus the ProcessDirectoryEntry on each file entry the async nature of process.nextTick causes that I cannot get back the results array modified as in the previous listLocal function where I just did an iterative array.map of the results array. How to patch the listLocalNextTick to behave like the listLocal but keeping process.nextTick approach?

[UPDATE]

根据提出的解决方案,这是迄今为止的最佳实现:

According to the proposed solution, this is the best implementation so far:

       /**
         * Scan files in directory
         * @param {String} needle 
         * @param {object} options 
         * @returns {nodeStream}
         */
        scanDirStream : function(needle,params) {
            var options = {
                type: 'f',
                name: '*'
            };
            for (var attrname in params) { options[attrname] = params[attrname]; }
            return new Promise((resolve, reject) => {
                var opt=[needle];
                for (var k in options) {
                    var v = options[k];
                    if (!Util.empty(v)) {
                        opt.push('-' + k);
                        opt.push(v);
                    }
                };
                var data='';
                var listing = spawn('find',opt)
                listing.stdout.on('data', _data => {
                    var buff=Buffer.from(_data, 'utf-8').toString();
                    if(buff!='') data+=buff;
                })
                listing.stderr.on('data', error => {
                    return reject(Buffer.from(error, 'utf-8').toString());
                });
                listing.on('close', (code) => {
                    var res = data.split('\n');
                    return resolve(res);
                });
            });

用法示例:

scanDirStream(mediaRoot,{
        name: '*.mp3'
    })
    .then(results => {
        console.info("files:%d", results);
    })
    .catch(error => {
        console.error("error %s", error);
    });

最终可以对其进行修改,以在在目录中获取新文件时在发出的每个stdout.on事件中添加一个滴答回调.

This can be eventually modified to add a tick callback at every stdout.on event emitted when getting a new file in the directory listening.

推荐答案

我已经为find创建了一个包装器,但是您可以以相同的方式使用dir或ls.

I have Created a wrapper around find for it but you could use dir or ls in the same way.

const { spawn } = require('child_process');

/**
 * findNodeStream
 * @param {String} dir 
 * @returns {nodeStream}
 */
const findNodeStream = (dir,options) => spawn('find',[dir,options].flat().filter(x=>x));

/**
 * Usage Example:
  let listing = findNodeStream('dir',[options])
  listing.stdout.on('data', d=>console.log(d.toString()))
  listing.stderr.on('data', d=>console.log(d.toString()))
  listing.on('close', (code) => {
    console.log(`child process exited with code ${code}`);
  });
*/

这允许您流式传输分块目录,而不是像fs.readdir那样整体传输.

this allows you to stream a directory chunked and not in a whole as fs.readdir does.

NodeJS> 12.11.1将具有异步readdir支持 降落在cbd8d71( https://github.com/nodejs/node/commit/cbd8d715b2286e5726e6988921 >)作为fs {Promises} .opendir(),它返回一个fs.Dir,它公开了一个异步迭代器.多田

NodeJS > 12.11.1 will have async readdir support Landed in cbd8d71 ( https://github.com/nodejs/node/commit/cbd8d715b2286e5726e6988921f5c870cbf74127 ) as fs{Promises}.opendir(), which returns an fs.Dir, which exposes an async iterator. tada

https://nodejs.org/api/fs.html#fs_fspromises_opendir_path_options

const fs = require('fs');

async function print(path) {
  const dir = await fs.promises.opendir(path);
  for await (const dirent of dir) {
    console.log(dirent.name);
  }
}
print('./').catch(console.error);

这篇关于节点fs.readdir在文件太多的文件夹中冻结的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆