Python 编写带有行和列转置的 .csv 文件 [英] Python writing a .csv file with rows and columns transpose

查看:62
本文介绍了Python 编写带有行和列转置的 .csv 文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我所拥有的是一长串代码,涉及读取不同的文件,最后将所有内容放入不同的 .csv 中

这是我所有的代码

导入csv导入 os.path#打开文件+readlineswith open("C:/Users/Ivan Wong/Desktop/Placement/Lists of targets/Mouse/UCSC to Ensembl.csv", "r") as f:reader = csv.reader(f, delimiter = ',')#在第一行找到名字的文件对于阅读器中的行:graph_filename = os.path.join("C:/Python27/Scripts/My scripts/Selenoprotein/NMD targets",row[0]+"_nt_counts.txt.png")如果 os.path.exists(graph_filename):y = row[0]+'_nt_counts.txt'r = open('C:/Users/Ivan Wong/Desktop/Placement/fp_mesc_nochx/'+y, 'r')k = r.readlines()关闭德尔 k[:1]k = map(lambda s: s.strip(), k)整数 = 地图(整数,k)导入迭代工具#每3行添加一个数字def grouper(n, iterable, fillvalue=None):石斑鱼(3,'ABCDEFG','x')--> ABC DEF Gxx"args = [iter(iterable)] * nreturn itertools.izip_longest(*args, fillvalue=fillvalue)结果 = 地图(总和,石斑鱼(3,整数,0))e = 行[1]cDNA = open('C:/Users/Ivan Wong/Desktop/Placement/Downloaded seq/Mouse/cDNA.txt', 'r')seq = cDNA.readlines()# 获取所有具有基因名称的行行数 = 0;线基因 = []对于序列中的行:行数 = 行数 +1如果'>'排队:lineGenes.append(str(lineNum))如果 '>'+e 在线:行开始 = 行号cDNA.close# 这是哪个基因index1 = lineGenes.index(str(lineBegin))lineEnd = lineGenes[index1+1]# linebegin 和 lineEnd 现在给你,在哪里寻找你的序列,所有这些# 你要做的就是读取文件中lineBegin和lineEnd之间的行# 并将其变成单个字符串.lineEnd = lineGenes[index1+1]最后一行 = int(lineEnd) -1# 在您的代码中,您已经列出了所有行 (q),首先删除# \n 和其他符号,然后将所有行组合成一大串核苷酸(像这样)qq = seq[lineBegin:Lastline]qq = map(lambda s: s.strip(), qq)字符串 = ''对于范围内的 i(len(qq)):字符串 = 字符串 + qq[i]# 现在你想得到一个三元组的列表,你可以再次使用 for 循环:# 首先获取字符串的长度lenString = len(string);# 这是你的密码子列表listCodon = []对于范围内的 i (0,lenString/3):listCodon.append(string[0+i*3:3+i*3])以 open(e+'.csv','wb') 作为输出文件:outfile.writelines(str(result)+'\n'+str(listCodon))

我的问题是生成的文件如下所示:

 0 0 0'GCA' 'CTT' 'GGT'

我想这样做:

0 GCA0 CTT0 GGT

我可以在我的代码中做什么来实现这一目标?

打印结果:

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 3, 3, 0, 3, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 5, 0, 0, 0, 0, 6, 0, 1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 1, 6, 34, 35, 32, 1, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

打印列表密码子:

['gtt', 'gaa', 'aca', 'gag', 'aca', 'tgt', 'tct', 'gga', 'gat', 'gag', 'ctg', 'tgg', 'gca', 'gaa', 'gga', 'cag', 'gcc', 'taa', 'gca', 'cag', 'gca', 'gca', 'gag', 'ctt', 'tga', 'tct', 'ctt', 'ggt', 'gat', 'cgg', 'tgg', 'ggg', 'atc', 'cgg', 'tgg', 'cct', 'agc', 'ttg', 'tgc', 'caa', 'gga', 'agc', 'tgc', 'tca', 'gct', 'ggg', 'aaa', 'gaa', 'ggt', 'ggc', 'tgt', 'ggc', 'tga', 'cta', 'tgt', 'gga', 'acc', 'ttc', 'tcc', 'ccg', 'agg', 'cac', 'caa', 'gtg', 'ggg', 'cct', 'tgg', 'tgg', 'cac', 'ctg', 'tgt', 'caa', 'cgt', 'ggg', 'ttg', 'cat', 'acc', 'caa', 'gaa', 'gct', 'gat', 'gca', 'tca', 'ggc', 'tgc', 'act', 'gct', 'ggg', 'ggg', 'cat', 'gat', 'cag', 'aga', 'tgc', 'tca', 'cca', 'cta', 'tgg', 'ctg', 'gga', 'ggt', 'ggc', 'cca', 'gcc', 'tgt', 'cca', 'aca', 'caa', 'ctg', 'gtg', 'aga', 'gag', 'aag', 'ccc', 'ttg', 'ccc', 'tct', 'gca', 'ggt', 'ccc', 'att', 'gaa', 'agg', 'aga', 'ggt', 'ttg', 'ctc', 'tct', 'gcc', 'act', 'cat', 'ctg', 'taa', 'ccg', 'tga', 'gct', 'ttt', 'cca', 'ccc', 'ggc', 'ctc', 'ctc', 'ttt', 'gat', 'ccc', 'aga', 'ata', 'atg', 'act', 'ctg', 'aga', 'ctt', 'ctt', 'atg', 'tat', 'gaa', 'taa', 'atg', 'cct', 'ggg', 'cca', 'aaa', 'acc']

左边的图片是Marek的代码帮我实现的,我想做一个改进,让它像右边的图片一样排列

解决方案

您可以使用 zip() 将两个迭代器压缩在一起.所以如果你有

result = [0, 0, 0, 0, 0]listCodons = ['gtt', 'gaa', 'aca', 'gag', 'aca']

那你就可以了

<预><代码>>>>列表(zip(结果,listCodons))[(0, 'gtt'), (0, 'gaa'), (0, 'aca'), (0, 'gag'), (0, 'aca')]

或者,例如:

 with open(e+'.csv','w') 作为输出文件:out = csv.writer(输出文件)out.writerows(zip(result, listCodons))

What I have is a long list of codes that involves reading different files and in the end putting everything into different .csv

This is all my codes

import csv
import os.path
#open files + readlines
with open("C:/Users/Ivan Wong/Desktop/Placement/Lists of targets/Mouse/UCSC to Ensembl.csv", "r") as f:
    reader = csv.reader(f, delimiter = ',')
    #find files with the name in 1st row
    for row in reader:
        graph_filename = os.path.join("C:/Python27/Scripts/My scripts/Selenoprotein/NMD targets",row[0]+"_nt_counts.txt.png")
        if os.path.exists(graph_filename):
            y = row[0]+'_nt_counts.txt'  
            r = open('C:/Users/Ivan Wong/Desktop/Placement/fp_mesc_nochx/'+y, 'r')
            k = r.readlines()
            r.close
            del k[:1]
            k = map(lambda s: s.strip(), k)
            interger = map(int, k)   
            import itertools
            #adding the numbers for every 3 rows
            def grouper(n, iterable, fillvalue=None):
                "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
                args = [iter(iterable)] * n
                return itertools.izip_longest(*args, fillvalue=fillvalue)
            result = map(sum, grouper(3, interger, 0))       
            e = row[1]
            cDNA = open('C:/Users/Ivan Wong/Desktop/Placement/Downloaded seq/Mouse/cDNA.txt', 'r')
            seq = cDNA.readlines()
            # get all lines that have a gene name
            lineNum = 0;
            lineGenes = []
            for line in seq:
                lineNum = lineNum +1
                if '>' in line:
                    lineGenes.append(str(lineNum))
                if '>'+e in line:
                    lineBegin = lineNum

            cDNA.close

            # which gene is this
            index1 = lineGenes.index(str(lineBegin))
            lineEnd = lineGenes[index1+1]           
# linebegin and lineEnd now give you, where to look for your sequence, all that 
# you have to do is to read the lines between lineBegin and lineEnd in the file
# and make it into a single string.            
            lineEnd = lineGenes[index1+1]
            Lastline = int(lineEnd) -1

# in your code you have already made a list with all the lines (q), first delete
# \n and other symbols, then combine all lines into a big string of nucleotides (like this)     
            qq = seq[lineBegin:Lastline]
            qq = map(lambda s: s.strip(), qq)
            string  = ''
            for i in range(len(qq)):
                string = string + qq[i]
# now you want to get a list of triplets, again you can use the for loop:
# first get the length of the string
            lenString = len(string);
# this is your list codons
            listCodon = []
            for i in range(0,lenString/3): 
                listCodon.append(string[0+i*3:3+i*3])
            with open(e+'.csv','wb') as outfile:
                outfile.writelines(str(result)+'\n'+str(listCodon))

My problem here is the file produced looks like this:

 0      0      0        
'GCA'  'CTT'   'GGT'

I want to make it like this:

0  GCA    
0  CTT    
0  GGT

What can I do in my code to achieve this?

print result:

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 3, 3, 0, 3, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 5, 0, 0, 0, 0, 6, 0, 1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 1, 6, 34, 35, 32, 1, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

print listCodon:

['gtt', 'gaa', 'aca', 'gag', 'aca', 'tgt', 'tct', 'gga', 'gat', 'gag', 'ctg', 'tgg', 'gca', 'gaa', 'gga', 'cag', 'gcc', 'taa', 'gca', 'cag', 'gca', 'gca', 'gag', 'ctt', 'tga', 'tct', 'ctt', 'ggt', 'gat', 'cgg', 'tgg', 'ggg', 'atc', 'cgg', 'tgg', 'cct', 'agc', 'ttg', 'tgc', 'caa', 'gga', 'agc', 'tgc', 'tca', 'gct', 'ggg', 'aaa', 'gaa', 'ggt', 'ggc', 'tgt', 'ggc', 'tga', 'cta', 'tgt', 'gga', 'acc', 'ttc', 'tcc', 'ccg', 'agg', 'cac', 'caa', 'gtg', 'ggg', 'cct', 'tgg', 'tgg', 'cac', 'ctg', 'tgt', 'caa', 'cgt', 'ggg', 'ttg', 'cat', 'acc', 'caa', 'gaa', 'gct', 'gat', 'gca', 'tca', 'ggc', 'tgc', 'act', 'gct', 'ggg', 'ggg', 'cat', 'gat', 'cag', 'aga', 'tgc', 'tca', 'cca', 'cta', 'tgg', 'ctg', 'gga', 'ggt', 'ggc', 'cca', 'gcc', 'tgt', 'cca', 'aca', 'caa', 'ctg', 'gtg', 'aga', 'gag', 'aag', 'ccc', 'ttg', 'ccc', 'tct', 'gca', 'ggt', 'ccc', 'att', 'gaa', 'agg', 'aga', 'ggt', 'ttg', 'ctc', 'tct', 'gcc', 'act', 'cat', 'ctg', 'taa', 'ccg', 'tga', 'gct', 'ttt', 'cca', 'ccc', 'ggc', 'ctc', 'ctc', 'ttt', 'gat', 'ccc', 'aga', 'ata', 'atg', 'act', 'ctg', 'aga', 'ctt', 'ctt', 'atg', 'tat', 'gaa', 'taa', 'atg', 'cct', 'ggg', 'cca', 'aaa', 'acc']

picture on the left is what Marek's code helped me to achieve, I want to make an improvement so it arrange like the picture on the right

解决方案

You can use zip() to zip together two iterators. So if you have

result = [0, 0, 0, 0, 0]
listCodons = ['gtt', 'gaa', 'aca', 'gag', 'aca']

then you can do

>>> list(zip(result, listCodons))
[(0, 'gtt'), (0, 'gaa'), (0, 'aca'), (0, 'gag'), (0, 'aca')]

or, for your example:

with open(e+'.csv','w') as outfile:
    out = csv.writer(outfile)
    out.writerows(zip(result, listCodons))

这篇关于Python 编写带有行和列转置的 .csv 文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆