在python中读取一个多层的csv文件 [英] read a multi tier csv file in python

查看:169
本文介绍了在python中读取一个多层的csv文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

  [L02] 
g,g, g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g, w,w,w,w,g,g
g,g,g,g,g,g,g,g,w,w,w,w,w,g,g,g
g, g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g, w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,g,g
g,g,g,g,g,g, g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,g,g,g $ g $ g $ g $ g $ g $ g $ g g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g g g,g,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g, g $ g $ g $ g $ g g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g $ g,g,w,w,w,g,g,g,g,g,g
g,g,g,g,g,g,w,w,w,w,w,g,g, g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
[L01]
d,d ,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d ,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d d,d,d,d,d,d,d,d,b,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d ,d,d,d,d,d,d,d,d
d,d,d, d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d, d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d, d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d, d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d, d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d, d,d,d,d,d,d,d

csv文件,但我不知道如何读取每个文件作为一个单独的列表



我想要的输出是每个块的数组/列表块的内容为列表元素。任何想法?

解决方案

下面是一个脚本,演示如何将问题分解为可重复使用的步骤(函数)并执行转换

  import itertools 
导入操作符
导入re
导入csv
import pprint

class TaggedLine(str):

覆盖str以允许添加标签

def __new__ (cls,val,tag):
return str .__ new __(cls,val)
$ b def __init __(self,val,tag):
super(TaggedLine,self)。 __init __(val)
self.tag =标记

def节(流):

使用[节]标记每一行或无)

section_pattern = re.compile('\ [(。*)\'')
section =无
为流水线:
matcher = section_pattern.match(line)
如果匹配器:
section = matcher.group(1)
继续
产生TaggedLine(行,部分)

def分隔符(流):

将每个流分组到

返回itertools.groupby(sections(stream),operator.attrgetter('tag'))

def parsed_sections(stream):
用于section,lines in splitter(stream):
yield section,list(csv.reader(lines))

if __name__ =='__main__':
with open('data.csv')as stream:
为section,parsed_sections(stream)中的数据:
print'section',section
pprint.pprint(data [:2])

将文件保存为data.csv,脚本将使用此输出在您的数据上运行:

  section L02 
[['g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'w ',
'w',
'w',
'w',
'g',
'g'],
['g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'g',
'w ',
'w',
'w',
'w',
'w',
'g',
'g'] ]
section L01
[['d',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd',
'd'],
['d',
'd',
'd',
'd',
'd',
'd',
'd',
' d',
'd',
'd',
'd',
'd',
'd',
'd' ,
'd',
'd']]


I need to read the following data out of a text file;

[L02]
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,w,g,g
g,g,g,g,g,g,g,g,w,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,w,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,g,w,w,g,g,g
g,g,g,g,g,g,g,g,g,g,w,w,w,g,g,g
g,g,g,g,g,g,g,g,g,w,w,w,g,g,g,g
g,g,g,g,g,g,g,g,w,w,w,w,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,g,g,g,g,g,g
g,g,g,g,g,g,w,w,w,w,w,g,g,g,g,g
g,g,g,g,g,g,g,w,w,w,w,g,g,g,g,g
[L01]
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d
d,d,d,d,d,d,d,d,d,d,d,d,d,d,d,d

I can read a single block as a csv file but I don't know how to read each file as a separate list

The output I want is to have arrays/lists for each block with the block contents as the list elements. Any ideas?

解决方案

Here's a script that demonstrates how to break down the problem into reusable steps (functions) and performs the transformation your need.

import itertools
import operator
import re
import csv
import pprint

class TaggedLine(str):
    """
    Override str to allow a tag to be added.
    """
    def __new__(cls, val, tag):
        return str.__new__(cls, val)

    def __init__(self, val, tag):
        super(TaggedLine, self).__init__(val)
        self.tag = tag

def sections(stream):
    """
    Tag each line of the stream with its [section] (or None)
    """
    section_pattern = re.compile('\[(.*)\]')
    section = None
    for line in stream:
        matcher = section_pattern.match(line)
        if matcher:
            section = matcher.group(1)
            continue
        yield TaggedLine(line, section)

def splitter(stream):
    """
    Group each stream into sections
    """
    return itertools.groupby(sections(stream), operator.attrgetter('tag'))

def parsed_sections(stream):
    for section, lines in splitter(stream):
        yield section, list(csv.reader(lines))

if __name__ == '__main__':
    with open('data.csv') as stream:
        for section, data in parsed_sections(stream):
            print 'section', section
            pprint.pprint(data[:2])

Save your file as 'data.csv' and the script will run on your data with this output:

section L02
[['g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'w',
  'w',
  'w',
  'w',
  'g',
  'g'],
 ['g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'g',
  'w',
  'w',
  'w',
  'w',
  'w',
  'g',
  'g']]
section L01
[['d',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd'],
 ['d',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd',
  'd']]

这篇关于在python中读取一个多层的csv文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆