输出python到csv常规 [英] output python to csv regular

查看:160
本文介绍了输出python到csv常规的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

hello我是新的python / scrapy世界,我需要导出我的产品列表为csv像这个例子:
我想要什么
但我得到这一个:
我得到了什么



/////
spider:
/////

  import scrapy 
import csv
从escrap.items import EscrapItem

class EscrapSpider(scrapy.Spider):
name =tunisianet
allowed_domains = [tunisianet.com.tn]
start_urls = [
http://www.tunisianet.com.tn/385-logiciels-informatique-tunisie/
]

def parse(self,response):
for sel in response .xpath('// * [contains(@class,ajax_block_product)]'):
item = EscrapItem()
item ['revendeur'] ='\\\
'.join .xpath('// * [contains(@class,center_block)] / h2 / a / @ href')。re('tunisianet'))
item ['produit'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / text()')extract())
item ['lien'] = \\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / @ href')。extract())
item ['description'] ='\\\
'.join(sel.xpath('// * [contains(@class,product_desc)] / a / text()')extract())
item ['prix' ] ='\\\
'.join(sel.xpath('// * [contains(@class,price)] / text()')extract())
data = [item ['
out = open('out.csv','w')',''''''
for row in data:
for row in row:
out.write(column.encode('utf-8'))
返回数据

/////
项目:
/////

  import scrapy 

class EscrapItem(scrapy.Item):
revendeur = scrapy.Field ()
produit = scrapy.Field()
lien = scrapy.Field()
description = scrapy.Field()
prix = scrapy.Field

/////
管道:
/// //

  class EscrapPipeline(object):

#将所有单词放在小写
word_to_filter = ['politics','religion']
def process_item(self,item,spider):
for word in self.words_to_filter:
if word in unicode([item ['revendeur ',item ['produit'],item ['lien'],item ['description'],item ['prix']]。lower():
raise DropItem(Contains forbidden word: s%word)
else:
return item

//// /
我的设置:
/////

  BOT_NAME = 'escrap'

SPIDER_MODULES = ['escrap.spiders']
NEWSPIDER_MODULE ='escrap.spiders'
ITEM_PIPELINES = {'escrap.pipelines.Escrap管道':1}
FEED_EXPORTERS = {
'csv':'escrap.escrap_csv_item_exporter.EscrapCsvItemExporter',
}
FIELDS_TO_EXPORT = [
'revendeur',
'produit',
'lien',
'description',
'prix'
]


你不需要在解析项目时自己创建csv文件,scrapy可以默认导出到csv文件。



因此将 parse 方法更改为:

  def parse(self,response):
for sel in response.xpath('// * [contains(@class,ajax_block_product)]'):
item = EscrapItem()
item ['revendeur'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / @ href')。re('tunisianet') )
item ['produit'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / text()')。提取())
item ['lien'] ='\\\
'.join(sel.xpath('// * [contains(@class,center_block)] / h2 / a / @ href' ).extract())
item ['description'] ='\\\
'.join(sel.xpath('// * [contains(@class,product_desc)] / a / text ').text()'''''''''。 ).extract())
yield item

稍后调用scrapy时, :

  scrapy crawl myspider -o output.csv 

现在您已将所有项目导出到csv文件。



如果您仍想在自己的管道上控制它,请点击此处创建自己的出口商。它希望这样:

 从scrapy导入信号
从scrapy.exporters import CsvItemExporter

class CSVExportPipeline(object):

def __init __(self):
self.files = {}

@classmethod
def from_crawler(cls, crawler):
pipeline = cls()
crawler.signals.connect(pipeline.spider_opened,signals.spider_opened)
crawler.signals.connect(pipeline.spider_closed,signals.spider_closed)
return pipeline

def spider_opened(self,spider):
file = open('%s_products.csv'%spider.name,'w + b')
self .files [spider] = file
self.exporter = CsvItemExporter(file)
self.exporter.start_exporting()

def spider_closed(self,spider):
self.exporter.finish_exporting()
file = self.files.pop(spider)
file.close()

def process_item(self,item,spider):
self.exporter.export_item(item)
return item

创建自己的管道请务必完整阅读。 p>

hello i'm new on python/scrapy world, i need to export my list of products to csv like this exemple: what i want but i get this one: what i got

///// spider: /////

import scrapy
import csv
from escrap.items import EscrapItem

class EscrapSpider(scrapy.Spider):
    name = "tunisianet"
    allowed_domains = ["tunisianet.com.tn"]
    start_urls = [
        "http://www.tunisianet.com.tn/385-logiciels-informatique-tunisie/"
    ]

    def parse(self, response):
        for sel in response.xpath('//*[contains(@class, "ajax_block_product")]'):
            item = EscrapItem()
            item['revendeur'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet'))
            item['produit'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract())
            item['lien'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract())
            item['description'] = '\n'.join(sel.xpath('//*[contains(@class, "product_desc")]/a/text()').extract())
            item['prix'] = '\n'.join(sel.xpath('//*[contains(@class, "price")]/text()').extract())
        data = [item['revendeur'], item['produit'], item['lien'], item['description'], item['prix']]
        out = open('out.csv', 'w')
        for row in data:
            for column in row:
                out.write(column.encode('utf-8'))
        return data

///// items: /////

import scrapy

class EscrapItem(scrapy.Item):
    revendeur = scrapy.Field()
    produit = scrapy.Field()
    lien = scrapy.Field()
    description = scrapy.Field()
    prix = scrapy.Field()

///// pipelines: /////

class EscrapPipeline(object):

    # put all words in lowercase
    words_to_filter = ['politics', 'religion']
    def process_item(self, item, spider):
        for word in self.words_to_filter:
            if word in      unicode([item['revendeur'],item['produit'],item['lien'],item['description'],item    ['prix']]).lower():
                raise DropItem("Contains forbidden word: %s" % word)
        else:
            return item

///// my setting: /////

BOT_NAME = 'escrap'

SPIDER_MODULES = ['escrap.spiders']
NEWSPIDER_MODULE = 'escrap.spiders'
ITEM_PIPELINES = {'escrap.pipelines.EscrapPipeline': 1}
FEED_EXPORTERS = {
    'csv': 'escrap.escrap_csv_item_exporter.EscrapCsvItemExporter',
}
FIELDS_TO_EXPORT = [
    'revendeur',
    'produit',
    'lien',
    'description',
    'prix'
]

解决方案

You don't need to create the csv file yourself when parsing items, scrapy can export by default to a csv file.

so change your parse method to:

def parse(self, response):
    for sel in response.xpath('//*[contains(@class, "ajax_block_product")]'):
        item = EscrapItem()
        item['revendeur'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').re('tunisianet'))
        item['produit'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/text()').extract())
        item['lien'] = '\n'.join(sel.xpath('//*[contains(@class, "center_block")]/h2/a/@href').extract())
        item['description'] = '\n'.join(sel.xpath('//*[contains(@class, "product_desc")]/a/text()').extract())
        item['prix'] = '\n'.join(sel.xpath('//*[contains(@class, "price")]/text()').extract())
        yield item

later when calling scrapy you can call it with:

scrapy crawl myspider -o output.csv

Now you have all your items exported to a csv file.

If you still want to control it on your own pipeline, check here to create your own exporter. It would like this:

from scrapy import signals
from scrapy.exporters import CsvItemExporter

class CSVExportPipeline(object):

    def __init__(self):
        self.files = {}

     @classmethod
     def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        file = open('%s_products.csv' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = CsvItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

To create your own pipeline make sure to read this entirely.

这篇关于输出python到csv常规的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆