javascript - 关于nodejs爬虫的问题?
本文介绍了javascript - 关于nodejs爬虫的问题?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
问 题
var request = require('request');
var fs = require('fs');
var cheerio = require("cheerio");
var url = 'http://www.fssxhsd.com/category.php?id=332';
request(url,function(err,result){
if(err){
console.log(err);
}
var $ = cheerio.load(result.body);
$('dl img').each(function(index,element){
var img_src = 'www.fssxhsd.com/' + $(this).attr('src');
console.log(img_src)
//采用request模块,向服务器发起一次请求,获取图片资源
request.head(img_src,function(err,res,body){
if(err){
console.log(err);
}
});
request(img_src).pipe(fs.createWriteStream('./image/'+index));
})
})
这是我的代码,但是报错:
Error: Invalid URI "www.fssxhsd.com/images/201703/thumb_img/277292_thumb_G_1489165224300.jpg"
at Request.init (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:276:31)
at new Request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:130:8)
at request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\index.js:54:10)
at Function.head (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\index.js:62:12)
at Object.<anonymous> (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:16:17)
at initialize.exports.each (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\cheerio\lib\api\traversing.js:300:24)
at Request._callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:12:17)
at Request.self.callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:188:22)
at emitTwo (events.js:106:13)
at Request.emit (events.js:194:7)
events.js:163
throw er; // Unhandled 'error' event
^
Error: Invalid URI "www.fssxhsd.com/images/201703/thumb_img/277292_thumb_G_1489165224300.jpg"
at Request.init (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:276:31)
at new Request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:130:8)
at request (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\index.js:54:10)
at Object.<anonymous> (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:21:9)
at initialize.exports.each (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\cheerio\lib\api\traversing.js:300:24)
at Request._callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\reptile2.js:12:17)
at Request.self.callback (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:188:22)
at emitTwo (events.js:106:13)
at Request.emit (events.js:194:7)
at Request.<anonymous> (C:\Users\Administrator\Desktop\mystyle\forExample\reptile\node_modules\request\request.js:1171:10)
请问这是为什么啊,它这个报错说无效的url但是这个url是可以进入的啊?
谢谢!
解决方案
var img_src = 'www.fssxhsd.com/' + $(this).attr('src');
加上 http
或者https
,谢谢
这篇关于javascript - 关于nodejs爬虫的问题?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文