输出python到csv常规 [英] output python to csv regular
问题描述
hello我是新的python / scrapy世界,我需要导出我的产品列表为csv像这个例子:
我想要什么
但我得到这一个:
我得到了什么
/////
spider:
/////
import scrapy
import csv
从escrap.items import EscrapItem
class EscrapSpider(scrapy.Spider):
name =tunisianet
allowed_domains = [tunisianet.com.tn]
start_urls = [
http://www.tunisianet.com.tn/385-logiciels-informatique-tunisie/
]
def parse(self,response):
for sel in response .xpath('// * [contains(@class,ajax_block_product)]'):
item = EscrapItem()
item ['revendeur'] ='\\\
'.join .xpath('// * [contains(@class,center_block)] / h2 / a / @ href')。re('tunisianet'))
item ['produit'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / text()')extract())
item ['lien'] = \\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / @ href')。extract())
item ['description'] ='\\\
'.join(sel.xpath('// * [contains(@class,product_desc)] / a / text()')extract())
item ['prix' ] ='\\\
'.join(sel.xpath('// * [contains(@class,price)] / text()')extract())
data = [item ['
out = open('out.csv','w')',''''''
for row in data:
for row in row:
out.write(column.encode('utf-8'))
返回数据
/////
项目:
/////
import scrapy
class EscrapItem(scrapy.Item):
revendeur = scrapy.Field ()
produit = scrapy.Field()
lien = scrapy.Field()
description = scrapy.Field()
prix = scrapy.Field
/////
管道:
/// //
class EscrapPipeline(object):
#将所有单词放在小写
word_to_filter = ['politics','religion']
def process_item(self,item,spider):
for word in self.words_to_filter:
if word in unicode([item ['revendeur ',item ['produit'],item ['lien'],item ['description'],item ['prix']]。lower():
raise DropItem(Contains forbidden word: s%word)
else:
return item
//// /
我的设置:
/////
BOT_NAME = 'escrap'
SPIDER_MODULES = ['escrap.spiders']
NEWSPIDER_MODULE ='escrap.spiders'
ITEM_PIPELINES = {'escrap.pipelines.Escrap管道':1}
FEED_EXPORTERS = {
'csv':'escrap.escrap_csv_item_exporter.EscrapCsvItemExporter',
}
FIELDS_TO_EXPORT = [
'revendeur',
'produit',
'lien',
'description',
'prix'
]
你不需要在解析项目时自己创建csv文件,scrapy可以默认导出到csv文件。
因此将 parse
方法更改为:
def parse(self,response):
for sel in response.xpath('// * [contains(@class,ajax_block_product)]'):
item = EscrapItem()
item ['revendeur'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / @ href')。re('tunisianet') )
item ['produit'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / text()')。提取())
item ['lien'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / @ href' ).extract())
item ['description'] ='\\\
'.join(sel.xpath('// * [contains(@class,product_desc)] / a / text ').text()'''''''''。 ).extract())
yield item
稍后调用scrapy时, :
scrapy crawl myspider -o output.csv
现在您已将所有项目导出到csv文件。
如果您仍想在自己的管道上控制它,请点击此处创建自己的出口商。它希望这样:
从scrapy导入信号
从scrapy.exporters import CsvItemExporter
class CSVExportPipeline(object):
def __init __(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened,signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed,signals.spider_closed)
return pipeline
def spider_opened(self,spider):
file = open('%s_products.csv'%spider.name,'w + b')
self .files [spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self,spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self,item,spider):
self.exporter.export_item(item)
return item
创建自己的管道请务必完整阅读。 p>
hello i'm new on python/scrapy world, i need to export my list of products to csv like this exemple: what i want but i get this one: what i got
///// spider: /////
import scrapy
import csv
from escrap.items import EscrapItem
class EscrapSpider(scrapy.Spider):
name = "tunisianet"
allowed_domains = ["tunisianet.com.tn"]
start_urls = [
"http://www.tunisianet.com.tn/385-logiciels-informatique-tunisie/"
]
def parse(self, response):
for sel in response.xpath('//*[contains(@class, "ajax_block_product")]'):
item = EscrapItem()
item['revendeur'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet'))
item['produit'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract())
item['lien'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract())
item['description'] = '\n'.join(sel.xpath('//*[contains(@class, "product_desc")]/a/text()').extract())
item['prix'] = '\n'.join(sel.xpath('//*[contains(@class, "price")]/text()').extract())
data = [item['revendeur'], item['produit'], item['lien'], item['description'], item['prix']]
out = open('out.csv', 'w')
for row in data:
for column in row:
out.write(column.encode('utf-8'))
return data
///// items: /////
import scrapy
class EscrapItem(scrapy.Item):
revendeur = scrapy.Field()
produit = scrapy.Field()
lien = scrapy.Field()
description = scrapy.Field()
prix = scrapy.Field()
///// pipelines: /////
class EscrapPipeline(object):
# put all words in lowercase
words_to_filter = ['politics', 'religion']
def process_item(self, item, spider):
for word in self.words_to_filter:
if word in unicode([item['revendeur'],item['produit'],item['lien'],item['description'],item ['prix']]).lower():
raise DropItem("Contains forbidden word: %s" % word)
else:
return item
///// my setting: /////
BOT_NAME = 'escrap'
SPIDER_MODULES = ['escrap.spiders']
NEWSPIDER_MODULE = 'escrap.spiders'
ITEM_PIPELINES = {'escrap.pipelines.EscrapPipeline': 1}
FEED_EXPORTERS = {
'csv': 'escrap.escrap_csv_item_exporter.EscrapCsvItemExporter',
}
FIELDS_TO_EXPORT = [
'revendeur',
'produit',
'lien',
'description',
'prix'
]
You don't need to create the csv file yourself when parsing items, scrapy can export by default to a csv file.
so change your parse
method to:
def parse(self, response):
for sel in response.xpath('//*[contains(@class, "ajax_block_product")]'):
item = EscrapItem()
item['revendeur'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet'))
item['produit'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract())
item['lien'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract())
item['description'] = '\n'.join(sel.xpath('//*[contains(@class, "product_desc")]/a/text()').extract())
item['prix'] = '\n'.join(sel.xpath('//*[contains(@class, "price")]/text()').extract())
yield item
later when calling scrapy you can call it with:
scrapy crawl myspider -o output.csv
Now you have all your items exported to a csv file.
If you still want to control it on your own pipeline, check here to create your own exporter. It would like this:
from scrapy import signals
from scrapy.exporters import CsvItemExporter
class CSVExportPipeline(object):
def __init__(self):
self.files = {}
@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
return pipeline
def spider_opened(self, spider):
file = open('%s_products.csv' % spider.name, 'w+b')
self.files[spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()
def spider_closed(self, spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()
def process_item(self, item, spider):
self.exporter.export_item(item)
return item
To create your own pipeline make sure to read this entirely.
这篇关于输出python到csv常规的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!