在 NodeJS 中读取第 N 行文件
声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow
原文地址: http://stackoverflow.com/questions/6394951/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me):
StackOverFlow
Read Nth line of file in NodeJS
提问by Glenjamin
I'm attempting to extract a single line of a file, given that I know the pathname and the line number, ideally I'd like to do this without reading any more of the file than is necessary.
鉴于我知道路径名和行号,我正在尝试提取文件的单行,理想情况下,我希望在不读取任何必要文件的情况下执行此操作。
For the purpose I'm using here, it doesn't matter if this is async or sync.
出于我在这里使用的目的,这是异步还是同步都没有关系。
My current (bad) implementation looks like this:
我当前的(坏的)实现是这样的:
function get_line(filename, line_no, callback) {
line_no = parseInt(line_no);
var data = fs.readFileSync(filename, 'utf8');
var lines = data.split("\n");
for (var l in lines) {
if (l == line_no - 1) {
callback(null, lines[l].trim());
return;
}
}
throw new Error('File end reached without finding line');
}
I tried to do something with a createReadStream, but the data events never seemed to fire. Can anyone provide a direct solution to this problem, or point me towards some NodeJS filesystem interaction documentation that is a little more example driven than the standard library API docs?
我试图用 createReadStream 做一些事情,但数据事件似乎从未触发过。任何人都可以为这个问题提供一个直接的解决方案,或者向我指出一些 NodeJS 文件系统交互文档,它比标准库 API 文档更具示例性?
采纳答案by FGRibreau
With readable stream
带有可读流
var fs = require('fs');
function get_line(filename, line_no, callback) {
var stream = fs.createReadStream(filename, {
flags: 'r',
encoding: 'utf-8',
fd: null,
mode: 0666,
bufferSize: 64 * 1024
});
var fileData = '';
stream.on('data', function(data){
fileData += data;
// The next lines should be improved
var lines = fileData.split("\n");
if(lines.length >= +line_no){
stream.destroy();
callback(null, lines[+line_no]);
}
});
stream.on('error', function(){
callback('Error', null);
});
stream.on('end', function(){
callback('File end reached without finding line', null);
});
}
get_line('./file.txt', 1, function(err, line){
console.log('The line: ' + line);
})
Direct solution:
直接解决:
You should use the slice method instead of a loop.
您应该使用 slice 方法而不是循环。
var fs = require('fs');
function get_line(filename, line_no, callback) {
var data = fs.readFileSync(filename, 'utf8');
var lines = data.split("\n");
if(+line_no > lines.length){
throw new Error('File end reached without finding line');
}
callback(null, lines[+line_no]);
}
get_line('./file.txt', 9, function(err, line){
console.log('The line: ' + line);
})
for (var l in lines) isn't the most efficient way for looping over an array, you should do this instead:
for (var l in lines) 不是循环数组的最有效方法,您应该这样做:
for(var i = 0, iMax = lines.length; i < iMax; i++){/* lines[i] */ }
The asynchronous way:
异步方式:
var fs = require('fs');
function get_line(filename, line_no, callback) {
fs.readFile(filename, function (err, data) {
if (err) throw err;
// Data is a buffer that we need to convert to a string
// Improvement: loop over the buffer and stop when the line is reached
var lines = data.toString('utf-8').split("\n");
if(+line_no > lines.length){
return callback('File end reached without finding line', null);
}
callback(null, lines[+line_no]);
});
}
get_line('./file.txt', 9, function(err, line){
console.log('The line: ' + line);
})
![]()
![]()
回答by Gabriel Llamas
without reading any more of the file than is necessary
无需阅读任何超出必要的文件内容
EDIT: the module is unmaintained, I recommend to use other modules to read line by line, for example, using transform streams: http://strongloop.com/strongblog/practical-examples-of-the-new-node-js-streams-api/
编辑:模块未维护,我建议使用其他模块逐行读取,例如,使用转换流:http: //strongloop.com/strongblog/practical-examples-of-the-new-node-js-流 API/
With a BufferedReader:
var n = 10;
var l = null;
//Internally it uses a buffer, default 16KB, but you can reduce it to, for example, 4KB doing:
//new BufferedReader ("file", { encoding: "utf8", bufferSize: 4*1024 })
new BufferedReader ("file", { encoding: "utf8" })
.on ("error", function (error){
//error
console.log (error);
})
.on ("line", function (line){
if (!--n){
l = line;
//With interrupt you can stop the reading
this.interrupt ();
}
})
.on ("end", function (){
//your Nth line!
console.log (l);
})
.read ();
回答by Off
You can improve a lot the performance of FGRibreau answer by deleting previous data in "fileData" variable.
您可以通过删除“fileData”变量中的先前数据来大大提高 FGRibreau 答案的性能。
function(file, line_no, cb){
var stream = fs.createReadStream(file, {
flags: 'r',
encoding: 'utf-8',
fd: null,
mode: '0666',
bufferSize: 64 * 1024
});
var fileData = '';
stream.on('data', function(data){
fileData += data;
var lines = fileData.split('\n');
if(lines.length >= +line_no){
stream.destroy();
cb(null, lines[+line_no]);
}
// Add this else condition to remove all unnecesary data from the variable
else
fileData = Array(lines.length).join('\n');
});
stream.on('error', function(){
cb('Error', null);
});
stream.on('end', function(){
cb('File end reached without finding line', null);
});
};
Using a 70000 lines file, to display line n°50000 I got those results:
使用 70000 行文件,为了显示第 n°50000 行,我得到了这些结果:
real 0m3.504s
真正的 0m3.504s
user 0m0.000s
用户 0m0.000s
sys 0m0.015s
系统 0m0.015s
For the same example with the else I got the following:
对于与 else 相同的示例,我得到以下信息:
real 0m0.540s
真实 0m0.540s
user 0m0.015s
用户 0m0.015s
sys 0m0.031s
系统 0m0.031s
This is also implies a much lower memory consumption.
这也意味着内存消耗要低得多。

