JavaScript堆内存不足会增加节点内存或优化我的代码 [英] JavaScript heap out of memory increase node memory or optimize my code

查看:100
本文介绍了JavaScript堆内存不足会增加节点内存或优化我的代码的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我的Node.js应用程序中的JavaScript堆内存不足.我正在尝试通过一次调用在MongoDB中插入408000个数据.我有两个循环.第一个循环从1到24,第二个循环(在第一个循环内)从1到17000.这些数据是NetCDF文件的结果.我正在从该文件中解析数据,正在构建模型对象,然后将这些数据插入MongoDB中.

I have a JavaScript heap out of memory in my Node.js application. I'm trying to insert 408 000 data in MongoDB with one call. I have two loop. The first loop goes from 1 to 24 and the second (inside the first loop) from 1 to 17 000. These data are the result of a NetCDF file. I'm parsing data from this file, I'm building the Model Object and I insert these data in MongoDB.

我在StackOverflow上看到了一些有关此问题的文章,然后看到可以使用--max_old_space_size增加节点内存.但是我不知道这是否是个好方法.也许您有一些建议可以优化我的代码?

I see some posts on StackOverflow about this problem then I see than I can increase the node memory with --max_old_space_size. But I don't know if it's the good way. Maybe you have some suggestions to optimize my code ?

这是我的循环:

for (var time_pos = 0; time_pos < 24; time_pos++) {

    // This array contains 17 000 data
    var dataSliced = file.root.variables['pm10_conc'].readSlice(
        time_pos, time_size,
        level_pos, level_size,
        lat_from, lat_size,
        lng_from, lng_size
    );

    // Loop : 0 to 17 000
    for (var i = 0; i < dataSliced.length; i++) {
        var pollution = new Pollution();

        latitude   = current_lat;
        longitude  = currrent_lng;
        country    = country_name;
        model      = model_name;
        data_type  = type_name;
        level      = 0;
        datetime   = date;
        pollutants.pm10.description = description;
        pollutants.pm10.units = units;
        pollutants.pm10.concentration = dataSliced[i];

        pollution.save(function(err){
            if (err) throw err;
            console.log("Data saved");
        })
    }
}

这是我的错误:

<--- Last few GCs --->

   56782 ms: Mark-sweep 1366.6 (1436.9) -> 1366.6 (1436.9) MB, 1943.5 / 0.0 ms [allocation failure] [GC in old space requested].
   58617 ms: Mark-sweep 1366.6 (1436.9) -> 1366.6 (1436.9) MB, 1834.9 / 0.0 ms [allocation failure] [GC in old space requested].
   60731 ms: Mark-sweep 1366.6 (1436.9) -> 1368.6 (1417.9) MB, 2114.3 / 0.0 ms [last resort gc].
   62707 ms: Mark-sweep 1368.6 (1417.9) -> 1370.7 (1417.9) MB, 1975.8 / 0.0 ms [last resort gc].


<--- JS stacktrace --->

==== JS stack trace =========================================

Security context: 0x3a7c3fbcfb51 <JS Object>
    1: fnWrapper [/var/www/html/Project/node_modules/hooks-fixed/hooks.js:185] [pc=0x6ccee7825d4] (this=0x3a7c3fbe6119 <JS Global Object>)
    2: fn [/var/www/html/Project/node_modules/mongoose/lib/schema.js:~250] [pc=0x6ccee7d8ffe] (this=0xd29dd7fea11 <a model with map 0x994a88e5849>,next=0x1cbe49858589 <JS Function fnWrapper (SharedFunctionInfo 0x3d8ecc066811)>,done=0x1cbe498586...

FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
 1: node::Abort() [node]
 2: 0x1098b2c [node]
 3: v8::Utils::ReportApiFailure(char const*, char const*) [node]
 4: v8::internal::V8::FatalProcessOutOfMemory(char const*, bool) [node]
 5: v8::internal::Factory::NewTransitionArray(int) [node]
 6: v8::internal::TransitionArray::Insert(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Map>, v8::internal::SimpleTransitionFlag) [node]
 7: v8::internal::Map::CopyReplaceDescriptors(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::DescriptorArray>, v8::internal::Handle<v8::internal::LayoutDescriptor>, v8::internal::TransitionFlag, v8::internal::MaybeHandle<v8::internal::Name>, char const*, v8::internal::SimpleTransitionFlag) [node]
 8: v8::internal::Map::CopyAddDescriptor(v8::internal::Handle<v8::internal::Map>, v8::internal::Descriptor*, v8::internal::TransitionFlag) [node]
 9: v8::internal::Map::CopyWithField(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::FieldType>, v8::internal::PropertyAttributes, v8::internal::Representation, v8::internal::TransitionFlag) [node]
10: v8::internal::Map::TransitionToDataProperty(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::internal::Object::StoreFromKeyed) [node]
11: v8::internal::LookupIterator::PrepareTransitionToDataProperty(v8::internal::Handle<v8::internal::JSObject>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::internal::Object::StoreFromKeyed) [node]
12: v8::internal::StoreIC::LookupForWrite(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
13: v8::internal::StoreIC::UpdateCaches(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
14: v8::internal::StoreIC::Store(v8::internal::Handle<v8::internal::Object>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
15: v8::internal::Runtime_StoreIC_Miss(int, v8::internal::Object**, v8::internal::Isolate*) [node]
16: 0x6ccee4092a7
Aborted
[nodemon] app crashed - waiting for file changes before starting...

您知道是否有一种方法可以优化我的代码,或者增加节点内存是最好的方法吗?

我有一个可行的解决方案.我尝试使用猫鼬insertMany(),但是致命错误分配再次失败.

I've a worked solution. I tried to use mongoose insertMany() but I have again the fatal error allocation failed.

然后,我删除了new Pollution并将数据推送到一个数组中.之后,我像这样使用collection.insertasync each:

Then I removed the new Pollution and push my data in an array. After that I'm using collection.insert and async each like this :

var pollution = [];   

for (var time_pos = 0; time_pos < 24; time_pos++) {

    // This array contains 17 000 data
    var dataSliced = file.root.variables['pm10_conc'].readSlice(
        time_pos, time_size,
        level_pos, level_size,
        lat_from, lat_size,
        lng_from, lng_size
    );

    async.each(dataSliced, function (item, next){

        pollution.push({
            'longitude' :current_lat,
            'latitude'  :current_lng,
            'country'   :country_name,
            'model'     :model_name,
            'data_type' :type_name",
            'level'     :0,
            'datetime'  : date,
            'pollution': {
                'pm10': {
                    'description': description,
                    'units': units,
                    'concentration': item
                }
            }
        });

    }
}

Pollution.collection.insert(pollution, function(err, docs){
    if (err) throw err;

    console.log("Data saved");
});

如果您有更好的解决方案,则可以发布答案.

If you have a better solution you can post your answer.

推荐答案

我希望这对您和其他人有帮助::-)

I hope this helps you and other... :-)

我一直在进行最好的将数据导入Mongodb的研究.我已经使用 Mongoimport 和Mongoose与

I have been doing quite intense research on what is best to import data to Mongodb. I have used Mongoimport and as well Mongoose with insertMany method (using Native Mongodb). I have read that it best to keep the batch sizes to about 100 for best performance. Here is my solution using insertMany. using Mongoimport is quite trivial (just one line of code). So I do not think it is necessary to post here.

在我的示例中,首先将602.198条记录解析为一个对象数组,然后成功将其导入到Mongodb中.

In my example 602.198 records were first parsed to an array of objects and second imported to Mongodb with success.

将解析的对象导入Mongodb需要一些内存,因此通常需要使用以下命令来允许节点使用更多的内存,可以读取更多的

It takes some memory to import the parsed objects into Mongodb so it is normally necessary to use below command to allow node use more memory, can read more here.

node --max_old_space_size=8000  partImportNew.js

为了提高效率,我将对象阵列分成批次并使用 Promise.all 在可迭代参数中的所有promise已解决时进行解析.

To increase efficiency I split the array of objects into batches and using Promise.all that resolves when all of the promises in the iterable argument have been resolved.

如果您有较大的文件,并且即使按节点增加了内存允许量,内存也用完了,则可以拆分文件.先删除标头,然后将其添加到csv解析器中.

If you have larger files and run out of memory even you increase the memory allowance by node, then you can split the files. Remove the headers before and add them in the csv parser instead.

要分割文件:

$ split -l numberoflines filename
ex. split -l 1000000 term2.csv

让我们说term2.csv有5.000.001行,没有标题. 从上面的示例中,您将获得6个文件,5个文件,每个文件包含一百万行和一个文件,其中包含一行.

lets say term2.csv has 5.000.001 lines and no headers. From above example you will get 6 files, 5 files with one million lines each and one file with one line.

看看我如何在mongodb.js文件的function bulkImportToMongo中解决它.

Have a look on how I solved it in the function bulkImportToMongo in mongodb.js file.

控制台

➜  database git:(master) ✗ node --max_old_space_size=8000  partImport.js
Connected to db!
Time to parse file: : 5209.325ms
Disconnected from db!
Time to import parsed objects to db: : 153606.545ms
➜  database git:(master) ✗

parseCSV.js

const csv = require("fast-csv");

function promiseCSV(filePath, options) {
  return new Promise((resolve, reject) => {
    console.time("Time to parse file");
    var records = [];
    csv
      .fromPath(filePath, options)
      .on("data", record => {
        records.push(record);
      })
      .on("end", () => {
        console.timeEnd("Time to parse file");
        resolve(records);
      });
  });
}

module.exports = promiseCSV;

mongodb.js

const mongoose = require("mongoose");
mongoose.Promise = global.Promise;

function connectToMongo(databaseName) {
  mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
    keepAlive: true,
    reconnectTries: Number.MAX_VALUE,
    useMongoClient: true
  });
  console.log("Connected to db!");
}

function disconnectFromMongo() {
  mongoose.disconnect();
  console.log("Disconnected from db!");
}

function bulkImportToMongo(arrayToImport, mongooseModel) {
  const Model = require(`../../../models/${mongooseModel}`);
  const batchSize = 100;
  let batchCount = Math.ceil(arrayToImport.length / batchSize);
  let recordsLeft = arrayToImport.length;
  let ops = [];
  let counter = 0;
  for (let i = 0; i < batchCount; i++) {
    let batch = arrayToImport.slice(counter, counter + batchSize);
    counter += batchSize;
    ops.push(Model.insertMany(batch));
  }
  return Promise.all(ops);
}

module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;

partImport.js

const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
  connectToMongo,
  disconnectFromMongo,
  bulkImportToMongo
} = require("./helpers/mongodb");

const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
  delimiter: ";",
  noheader: true,
  headers: [
    "facility",
    "partNumber",
    "partName",
    "partDescription",
    "netWeight",
    "customsTariff"
  ]
};

connectToMongo("autoMDM");
parseCSV(filePath, options)
  .then(records => {
    console.time("Time to import parsed objects to db");
    return bulkImportToMongo(records, "parts.js");
  })
  /*   .then(result =>
    console.log("Total batches inserted: ", result, result.length)
  ) */
  .then(() => {
    disconnectFromMongo();
    console.timeEnd("Time to import parsed objects to db");
  })
  .catch(error => console.log(error));

这篇关于JavaScript堆内存不足会增加节点内存或优化我的代码的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆