节点fs.readdir在文件太多的文件夹中冻结 [英] Node fs.readdir freezing in folders with too many files
问题描述
在Node.js中,我必须读取文件夹中的文件,并且对于每个文件都获得文件处理程序信息,这是我最简单的使用fs.readdir
的实现:
In Node.js I have to read files in a folder and for each file get file handler info, this is my simplest implementation using fs.readdir
:
FileServer.prototype.listLocal = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
fs.readdir(mediaDir, (error, results) => {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
} else { // list files
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
// format meta data
results = results.map(file => {
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
fs.closeSync(fd);//close file
return item;
});
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
}
})
});
}
这样,对于每个文件,我都会得到一组项目
so that for each file I get an array of items
{
"name": "sample121.mp3",
"path": "/data/sample121.mp3",
"size": 5751405,
"sizehr": "5.4850 MB",
"birthtime": "2018-10-08T15:26:08.397Z",
"mtime": "2018-10-08T15:26:11.650Z",
"atime": "2018-10-10T09:01:48.534Z",
"timestamp": 1539012371650,
"media_id": "sample121"
}
也就是说,问题在于,当要列出的文件夹包含大量文件时,node.js fs.readdir
可能会冻结Node I/O循环,比方说成千上万到数十万甚至更多.
这是一个已知问题-有关更多信息,请参见此处.
还计划以某种方式改进fs.readdir
,例如流式传输-在此处关于这个.
That said, the problem is it's knonw that node.js fs.readdir
may freeze Node I/O Loop when the folder to list has a large number of files, let's say from ten thousands to hundred thousands and more.
This is a known issue - see here for more info.
There are also plans to improve fs.readdir
in a some way, like streaming - see here about this.
与此同时,我正在搜索此补丁,因为我的文件夹很大.
由于问题是事件循环被冻结,因此有人提出了使用process.nextTick
的解决方案,该解决方案我已在此处集成
In the meanwhile I'm searching for like a patch to this, because my folders are pretty large.
Since the problem is the Event Loop get frozen, someone proposed a solution using process.nextTick
, that I have ensembled here
FileServer.prototype.listLocalNextTick = function (params) {
var self = this;
var options = {
limit: 100,
desc: 1
};
// override defaults
for (var attrname in params) { options[attrname] = params[attrname]; }
// media path is the media folder
var mediaDir = path.join(self._options.mediaDir, path.sep);
return new Promise((resolve, reject) => {
var AsyncArrayProcessor = function (inArray, inEntryProcessingFunction) {
var elemNum = 0;
var arrLen = inArray.length;
var ArrayIterator = function () {
inEntryProcessingFunction(inArray[elemNum]);
elemNum++;
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
if (elemNum < arrLen) process.nextTick(ArrayIterator);
}
fs.readdir(mediaDir, function (error, results) {
if (error) {
self.logger.error("FileServer.list error:%s", error);
return reject(error);
}
// cut to max files
results = results.slice(0, options.limit);
// filter default ext
results = results.filter(item => {
return (item.indexOf('.mp3') > -1);
});
var ProcessDirectoryEntry = function (file) {
// This may be as complex as you may fit in a single event loop
var filePath = path.join(self._options.mediaDir, path.sep, file);
var item = {
name: file,
path: filePath
};
const fd = fs.openSync(filePath, 'r');
var fstat = fs.fstatSync(fd);
// file size in bytes
item.size = fstat.size;
item.sizehr = self.formatSizeUnits(fstat.size);
// "Birth Time" Time of file creation. Set once when the file is created.
item.birthtime = fstat.birthtime;
// "Modified Time" Time when file data last modified.
item.mtime = fstat.mtime;
// "Access Time" Time when file data last accessed.
item.atime = fstat.atime;
item.timestamp = new Date(item.mtime).getTime();
item.media_id = path.basename(filePath, '.mp3');
// map to file item
file = item;
}//ProcessDirectoryEntry
// LP: fs.readdir() callback is finished, event loop continues...
AsyncArrayProcessor(results, ProcessDirectoryEntry);
if (options.desc) { // sort by most recent
results.sort(function (a, b) {
return b.timestamp - a.timestamp;
});
} else { // sort by older
results.sort(function (a, b) {
return a.timestamp - b.timestamp;
});
}
return resolve(results);
});
});
}//listLocalNextTick
这似乎避免了最初的问题,但是我再也无法通过以前使用文件处理程序将文件列表映射到项目,因为在文件列表上运行AsyncArrayProcessor
时,因此在每个文件条目上运行ProcessDirectoryEntry
process.nextTick
的异步特性导致我无法找回先前在listLocal
函数中修改过的results
数组,在该函数中,我只是对results
数组进行了迭代array.map
.
如何修补listLocalNextTick
以使其表现与listLocal
相似但保持process.nextTick
方法?
This seems to avoid the original issue, but I cannot anymore map the files lists to the items with file handler I did before, because when running the AsyncArrayProcessor
on the files list, thus the ProcessDirectoryEntry
on each file entry the async nature of process.nextTick
causes that I cannot get back the results
array modified as in the previous listLocal
function where I just did an iterative array.map
of the results
array.
How to patch the listLocalNextTick
to behave like the listLocal
but keeping process.nextTick
approach?
[UPDATE]
根据提出的解决方案,这是迄今为止的最佳实现:
According to the proposed solution, this is the best implementation so far:
/**
* Scan files in directory
* @param {String} needle
* @param {object} options
* @returns {nodeStream}
*/
scanDirStream : function(needle,params) {
var options = {
type: 'f',
name: '*'
};
for (var attrname in params) { options[attrname] = params[attrname]; }
return new Promise((resolve, reject) => {
var opt=[needle];
for (var k in options) {
var v = options[k];
if (!Util.empty(v)) {
opt.push('-' + k);
opt.push(v);
}
};
var data='';
var listing = spawn('find',opt)
listing.stdout.on('data', _data => {
var buff=Buffer.from(_data, 'utf-8').toString();
if(buff!='') data+=buff;
})
listing.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
listing.on('close', (code) => {
var res = data.split('\n');
return resolve(res);
});
});
用法示例:
scanDirStream(mediaRoot,{
name: '*.mp3'
})
.then(results => {
console.info("files:%d", results);
})
.catch(error => {
console.error("error %s", error);
});
最终可以对其进行修改,以在在目录中获取新文件时在发出的每个stdout.on
事件中添加一个滴答回调.
This can be eventually modified to add a tick callback at every stdout.on
event emitted when getting a new file in the directory listening.
推荐答案
我已经为find创建了一个包装器,但是您可以以相同的方式使用dir或ls.
I have Created a wrapper around find for it but you could use dir or ls in the same way.
const { spawn } = require('child_process');
/**
* findNodeStream
* @param {String} dir
* @returns {nodeStream}
*/
const findNodeStream = (dir,options) => spawn('find',[dir,options].flat().filter(x=>x));
/**
* Usage Example:
let listing = findNodeStream('dir',[options])
listing.stdout.on('data', d=>console.log(d.toString()))
listing.stderr.on('data', d=>console.log(d.toString()))
listing.on('close', (code) => {
console.log(`child process exited with code ${code}`);
});
*/
这允许您流式传输分块目录,而不是像fs.readdir那样整体传输.
this allows you to stream a directory chunked and not in a whole as fs.readdir does.
NodeJS> 12.11.1将具有异步readdir支持 降落在cbd8d71( https://github.com/nodejs/node/commit/cbd8d715b2286e5726e6988921 >)作为fs {Promises} .opendir(),它返回一个fs.Dir,它公开了一个异步迭代器.多田
NodeJS > 12.11.1 will have async readdir support Landed in cbd8d71 ( https://github.com/nodejs/node/commit/cbd8d715b2286e5726e6988921f5c870cbf74127 ) as fs{Promises}.opendir(), which returns an fs.Dir, which exposes an async iterator. tada
https://nodejs.org/api/fs.html#fs_fspromises_opendir_path_options
const fs = require('fs');
async function print(path) {
const dir = await fs.promises.opendir(path);
for await (const dirent of dir) {
console.log(dirent.name);
}
}
print('./').catch(console.error);
这篇关于节点fs.readdir在文件太多的文件夹中冻结的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!