使用Node.js的同步远程文件下载N多 [英] Downloading N number of remote files using Node.js synchronously

查看:543
本文介绍了使用Node.js的同步远程文件下载N多的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用Node.js的一个简单的应用程序,它需要做的给予有效的URL时,以下


  1. 检索远程页面的HTML,将其保存在本地。

  2. 蜘蛛的HTML(使用cheerio),并记录所有的JS和CSS文件的引用。

  3. 请为每个JS / CSS文件的HTTP请求和文件名,将其保存到服务器上。

  4. 拉链拉上的HTML,CSS和JS文件和流生成的文件浏览器。

我有1和2的工作,和#3的上半场,但我正在与下载的同步特性的问题。我的code为运行过快和用于所述内容的CSS和JS文件,但没有生成文件名。我猜这是因为我的code是不同步的。问题是,我不知道提前多少文件有可能是和所有的人都在那里可以生成的ZIP文件之前。

下面是我的应用程序,因为它当前存在的流动。我已经离开了辅助方法,因为它们不影响同步。可以任你提供意见,以我应该怎么办呢?

  http.get(fullurl,功能(RES){
    res.on(数据,功能(块){
        变量$源= $(''+块),
            JS = getJS($源,域)
            CSS = getCSS($源,域)
            uniqueName = PW(),
            DIR = [BASEDIR,'jsd-',uniqueName,'/']。加入(''),
            jsdir = DIR +'JS /',
            cssdir = DIR +'CSS /',
            HTML = rewritePaths($源);        //创建tmp目录
        fs.mkdirSync(DIR);        的console.log('创建的index.html');        //保存索引文件
        fs.writeFileSync(DIR +'的index.html',HTML);        //创建JS目录
        fs.mkdirSync(jsdir);        //保存JS文件
        js.forEach(功能(jsfile){
            。变种文件名= jsfile.split('/')反向()[0];
            请求(jsfile).pipe(fs.createWriteStream(jsdir +文件名));
            的console.log('创造'+文件名);
        });        //创建CSS目录
        fs.mkdirSync(cssdir);        //保存CSS文件
        css.forEach(功能(cssfile){
            。变种文件名= cssfile.split('/')反向()[0];
            请求(cssfile).pipe(fs.createWriteStream(cssdir +文件名));
            的console.log('创造'+文件名);
        });        //写zip文件到/ tmp
        writeZip(DIR,uniqueName);        // https://npmjs.org/package/node-zip
        // http://stuk.github.com/jszip/    });
})。在('错误',函数(E){
    的console.log(遇到错误:+ e.message);
});


解决方案

您是通过请求模块下载文件的方式是异步的。

 请求(cssfile).pipe(fs.createWriteStream(cssdir +文件名));

而不是下载一样,你需要做这样创建一个单独的功能

 功能下载(LOCALFILE,remotePath,回调){
VAR localStream = fs.createWriteStream(LOCALFILE);变量超出=请求({URI:remotePath});
out.on('回应',函数(RESP){
    如果(resp.status code === 200){
        out.pipe(localStream);
        localStream.on(关闭,函数(){
            回调(NULL,LOCALFILE);
        });
    }
    其他
        回调(新错误(没有在指定网址找到的文件),NULL);
})
};

您需要通过colan https://github.com/caolan/async 以使用异步模块

  //保存JS文件
    async.forEach(JS,功能(jsfile,CB){
        。变种文件名= jsfile.split('/')反向()[0];
        下载(jsdir +文件名,jsfile,功能(错了,结果){
          //这里处理错误          的console.log('创造'+文件名);
          CB();
        })
    },功能(错误){
     //创建CSS目录
    fs.mkdirSync(cssdir);    //保存CSS文件
    css.forEach(功能(cssfile){
        。变种文件名= cssfile.split('/')反向()[0];
        请求(cssfile).pipe(fs.createWriteStream(cssdir +文件名));
        的console.log('创造'+文件名);
    });    //写zip文件到/ tmp
    writeZip(DIR,uniqueName);
    });

I'm working on a simple app using Node.js which needs to do the following when given a valid URL

  1. Retrieve the HTML of the remote page, save it locally.
  2. Spider the HTML (using cheerio) and record all JS and CSS file references.
  3. Make HTTP request for each JS/CSS file and save it to the server by file name.
  4. Zip up the html, css, and js files and stream the resulting file to the browser.

I've got 1 and 2 working, and the first half of #3 but I'm running into issues with the synchronous nature of the downloads. My code is running too fast and generating file names for the CSS and JS files, but none of the content. I'm guessing this is because my code isn't synchronous. The problem is that I don't know in advance how many files there might be and all of them have to be there before the ZIP file can be generated.

Here's the flow of my app as it currently exists. I've left out the helper methods as they don't affect synchronicity. Can any of you provide input as to what I should do?

http.get(fullurl, function(res) {
    res.on('data', function (chunk) {
        var $source = $(''+chunk),
            js = getJS($source, domain),
            css = getCSS($source, domain),
            uniqueName = pw(),
            dir = [baseDir,'jsd-', uniqueName, '/'].join(''),
            jsdir = dir + 'js/',
            cssdir = dir + 'css/',
            html = rewritePaths($source);

        // create tmp directory
        fs.mkdirSync(dir);

        console.log('creating index.html');

        // save index file
        fs.writeFileSync(dir + 'index.html', html);

        // create js directory
        fs.mkdirSync(jsdir);

        // Save JS files
        js.forEach(function(jsfile){
            var filename = jsfile.split('/').reverse()[0];
            request(jsfile).pipe(fs.createWriteStream(jsdir + filename));
            console.log('creating ' + filename);
        });

        // create css directory
        fs.mkdirSync(cssdir);

        // Save CSS files
        css.forEach(function(cssfile){
            var filename = cssfile.split('/').reverse()[0];
            request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
            console.log('creating ' + filename);
        });

        // write zip file to /tmp
        writeZip(dir,uniqueName);

        // https://npmjs.org/package/node-zip
        // http://stuk.github.com/jszip/

    });
}).on('error', function(e) {
    console.log("Got error: " + e.message);
});

解决方案

The way you are downloading file through request module is asynchronous

request(cssfile).pipe(fs.createWriteStream(cssdir + filename));

instead of download like that you need to do like this create a seperate function

function download (localFile, remotePath, callback) {
var localStream = fs.createWriteStream(localFile);

var out = request({ uri: remotePath });
out.on('response', function (resp) {
    if (resp.statusCode === 200){
        out.pipe(localStream);
        localStream.on('close', function () {
            callback(null, localFile);
        });
    }
    else
        callback(new Error("No file found at given url."),null);
})
};

you need to use async module by colan https://github.com/caolan/async for

// Save JS files
    async.forEach(js,function(jsfile,cb){
        var filename = jsfile.split('/').reverse()[0];
        download(jsdir + filename,jsfile,function(err,result){
          //handle error here

          console.log('creating ' + filename);
          cb();
        })                        
    },function(err){
     // create css directory
    fs.mkdirSync(cssdir);

    // Save CSS files
    css.forEach(function(cssfile){
        var filename = cssfile.split('/').reverse()[0];
        request(cssfile).pipe(fs.createWriteStream(cssdir + filename));
        console.log('creating ' + filename);
    });

    // write zip file to /tmp
    writeZip(dir,uniqueName);
    });

这篇关于使用Node.js的同步远程文件下载N多的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆