Python 编写带有行和列转置的 .csv 文件 [英] Python writing a .csv file with rows and columns transpose
问题描述
我所拥有的是一长串代码,涉及读取不同的文件,最后将所有内容放入不同的 .csv 中
这是我所有的代码
导入csv导入 os.path#打开文件+readlineswith open("C:/Users/Ivan Wong/Desktop/Placement/Lists of targets/Mouse/UCSC to Ensembl.csv", "r") as f:reader = csv.reader(f, delimiter = ',')#在第一行找到名字的文件对于阅读器中的行:graph_filename = os.path.join("C:/Python27/Scripts/My scripts/Selenoprotein/NMD targets",row[0]+"_nt_counts.txt.png")如果 os.path.exists(graph_filename):y = row[0]+'_nt_counts.txt'r = open('C:/Users/Ivan Wong/Desktop/Placement/fp_mesc_nochx/'+y, 'r')k = r.readlines()关闭德尔 k[:1]k = map(lambda s: s.strip(), k)整数 = 地图(整数,k)导入迭代工具#每3行添加一个数字def grouper(n, iterable, fillvalue=None):石斑鱼(3,'ABCDEFG','x')--> ABC DEF Gxx"args = [iter(iterable)] * nreturn itertools.izip_longest(*args, fillvalue=fillvalue)结果 = 地图(总和,石斑鱼(3,整数,0))e = 行[1]cDNA = open('C:/Users/Ivan Wong/Desktop/Placement/Downloaded seq/Mouse/cDNA.txt', 'r')seq = cDNA.readlines()# 获取所有具有基因名称的行行数 = 0;线基因 = []对于序列中的行:行数 = 行数 +1如果'>'排队:lineGenes.append(str(lineNum))如果 '>'+e 在线:行开始 = 行号cDNA.close# 这是哪个基因index1 = lineGenes.index(str(lineBegin))lineEnd = lineGenes[index1+1]# linebegin 和 lineEnd 现在给你,在哪里寻找你的序列,所有这些# 你要做的就是读取文件中lineBegin和lineEnd之间的行# 并将其变成单个字符串.lineEnd = lineGenes[index1+1]最后一行 = int(lineEnd) -1# 在您的代码中,您已经列出了所有行 (q),首先删除# \n 和其他符号,然后将所有行组合成一大串核苷酸(像这样)qq = seq[lineBegin:Lastline]qq = map(lambda s: s.strip(), qq)字符串 = ''对于范围内的 i(len(qq)):字符串 = 字符串 + qq[i]# 现在你想得到一个三元组的列表,你可以再次使用 for 循环:# 首先获取字符串的长度lenString = len(string);# 这是你的密码子列表listCodon = []对于范围内的 i (0,lenString/3):listCodon.append(string[0+i*3:3+i*3])以 open(e+'.csv','wb') 作为输出文件:outfile.writelines(str(result)+'\n'+str(listCodon))
我的问题是生成的文件如下所示:
0 0 0'GCA' 'CTT' 'GGT'
我想这样做:
0 GCA0 CTT0 GGT
我可以在我的代码中做什么来实现这一目标?
打印结果:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 3, 3, 0, 3, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 5, 0, 0, 0, 0, 6, 0, 1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 1, 6, 34, 35, 32, 1, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
打印列表密码子:
['gtt', 'gaa', 'aca', 'gag', 'aca', 'tgt', 'tct', 'gga', 'gat', 'gag', 'ctg', 'tgg', 'gca', 'gaa', 'gga', 'cag', 'gcc', 'taa', 'gca', 'cag', 'gca', 'gca', 'gag', 'ctt', 'tga', 'tct', 'ctt', 'ggt', 'gat', 'cgg', 'tgg', 'ggg', 'atc', 'cgg', 'tgg', 'cct', 'agc', 'ttg', 'tgc', 'caa', 'gga', 'agc', 'tgc', 'tca', 'gct', 'ggg', 'aaa', 'gaa', 'ggt', 'ggc', 'tgt', 'ggc', 'tga', 'cta', 'tgt', 'gga', 'acc', 'ttc', 'tcc', 'ccg', 'agg', 'cac', 'caa', 'gtg', 'ggg', 'cct', 'tgg', 'tgg', 'cac', 'ctg', 'tgt', 'caa', 'cgt', 'ggg', 'ttg', 'cat', 'acc', 'caa', 'gaa', 'gct', 'gat', 'gca', 'tca', 'ggc', 'tgc', 'act', 'gct', 'ggg', 'ggg', 'cat', 'gat', 'cag', 'aga', 'tgc', 'tca', 'cca', 'cta', 'tgg', 'ctg', 'gga', 'ggt', 'ggc', 'cca', 'gcc', 'tgt', 'cca', 'aca', 'caa', 'ctg', 'gtg', 'aga', 'gag', 'aag', 'ccc', 'ttg', 'ccc', 'tct', 'gca', 'ggt', 'ccc', 'att', 'gaa', 'agg', 'aga', 'ggt', 'ttg', 'ctc', 'tct', 'gcc', 'act', 'cat', 'ctg', 'taa', 'ccg', 'tga', 'gct', 'ttt', 'cca', 'ccc', 'ggc', 'ctc', 'ctc', 'ttt', 'gat', 'ccc', 'aga', 'ata', 'atg', 'act', 'ctg', 'aga', 'ctt', 'ctt', 'atg', 'tat', 'gaa', 'taa', 'atg', 'cct', 'ggg', 'cca', 'aaa', 'acc']
左边的图片是Marek的代码帮我实现的,我想做一个改进,让它像右边的图片一样排列
您可以使用 zip()
将两个迭代器压缩在一起.所以如果你有
result = [0, 0, 0, 0, 0]listCodons = ['gtt', 'gaa', 'aca', 'gag', 'aca']
那你就可以了
<预><代码>>>>列表(zip(结果,listCodons))[(0, 'gtt'), (0, 'gaa'), (0, 'aca'), (0, 'gag'), (0, 'aca')]或者,例如:
with open(e+'.csv','w') 作为输出文件:out = csv.writer(输出文件)out.writerows(zip(result, listCodons))
What I have is a long list of codes that involves reading different files and in the end putting everything into different .csv
This is all my codes
import csv
import os.path
#open files + readlines
with open("C:/Users/Ivan Wong/Desktop/Placement/Lists of targets/Mouse/UCSC to Ensembl.csv", "r") as f:
reader = csv.reader(f, delimiter = ',')
#find files with the name in 1st row
for row in reader:
graph_filename = os.path.join("C:/Python27/Scripts/My scripts/Selenoprotein/NMD targets",row[0]+"_nt_counts.txt.png")
if os.path.exists(graph_filename):
y = row[0]+'_nt_counts.txt'
r = open('C:/Users/Ivan Wong/Desktop/Placement/fp_mesc_nochx/'+y, 'r')
k = r.readlines()
r.close
del k[:1]
k = map(lambda s: s.strip(), k)
interger = map(int, k)
import itertools
#adding the numbers for every 3 rows
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return itertools.izip_longest(*args, fillvalue=fillvalue)
result = map(sum, grouper(3, interger, 0))
e = row[1]
cDNA = open('C:/Users/Ivan Wong/Desktop/Placement/Downloaded seq/Mouse/cDNA.txt', 'r')
seq = cDNA.readlines()
# get all lines that have a gene name
lineNum = 0;
lineGenes = []
for line in seq:
lineNum = lineNum +1
if '>' in line:
lineGenes.append(str(lineNum))
if '>'+e in line:
lineBegin = lineNum
cDNA.close
# which gene is this
index1 = lineGenes.index(str(lineBegin))
lineEnd = lineGenes[index1+1]
# linebegin and lineEnd now give you, where to look for your sequence, all that
# you have to do is to read the lines between lineBegin and lineEnd in the file
# and make it into a single string.
lineEnd = lineGenes[index1+1]
Lastline = int(lineEnd) -1
# in your code you have already made a list with all the lines (q), first delete
# \n and other symbols, then combine all lines into a big string of nucleotides (like this)
qq = seq[lineBegin:Lastline]
qq = map(lambda s: s.strip(), qq)
string = ''
for i in range(len(qq)):
string = string + qq[i]
# now you want to get a list of triplets, again you can use the for loop:
# first get the length of the string
lenString = len(string);
# this is your list codons
listCodon = []
for i in range(0,lenString/3):
listCodon.append(string[0+i*3:3+i*3])
with open(e+'.csv','wb') as outfile:
outfile.writelines(str(result)+'\n'+str(listCodon))
My problem here is the file produced looks like this:
0 0 0
'GCA' 'CTT' 'GGT'
I want to make it like this:
0 GCA
0 CTT
0 GGT
What can I do in my code to achieve this?
print result:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 3, 3, 0, 3, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 0, 5, 0, 0, 0, 0, 6, 0, 1, 0, 0, 2, 0, 1, 0, 0, 1, 1, 0, 1, 6, 34, 35, 32, 1, 1, 0, 4, 1, 0, 1, 0, 0, 0, 0, 1, 6, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
print listCodon:
['gtt', 'gaa', 'aca', 'gag', 'aca', 'tgt', 'tct', 'gga', 'gat', 'gag', 'ctg', 'tgg', 'gca', 'gaa', 'gga', 'cag', 'gcc', 'taa', 'gca', 'cag', 'gca', 'gca', 'gag', 'ctt', 'tga', 'tct', 'ctt', 'ggt', 'gat', 'cgg', 'tgg', 'ggg', 'atc', 'cgg', 'tgg', 'cct', 'agc', 'ttg', 'tgc', 'caa', 'gga', 'agc', 'tgc', 'tca', 'gct', 'ggg', 'aaa', 'gaa', 'ggt', 'ggc', 'tgt', 'ggc', 'tga', 'cta', 'tgt', 'gga', 'acc', 'ttc', 'tcc', 'ccg', 'agg', 'cac', 'caa', 'gtg', 'ggg', 'cct', 'tgg', 'tgg', 'cac', 'ctg', 'tgt', 'caa', 'cgt', 'ggg', 'ttg', 'cat', 'acc', 'caa', 'gaa', 'gct', 'gat', 'gca', 'tca', 'ggc', 'tgc', 'act', 'gct', 'ggg', 'ggg', 'cat', 'gat', 'cag', 'aga', 'tgc', 'tca', 'cca', 'cta', 'tgg', 'ctg', 'gga', 'ggt', 'ggc', 'cca', 'gcc', 'tgt', 'cca', 'aca', 'caa', 'ctg', 'gtg', 'aga', 'gag', 'aag', 'ccc', 'ttg', 'ccc', 'tct', 'gca', 'ggt', 'ccc', 'att', 'gaa', 'agg', 'aga', 'ggt', 'ttg', 'ctc', 'tct', 'gcc', 'act', 'cat', 'ctg', 'taa', 'ccg', 'tga', 'gct', 'ttt', 'cca', 'ccc', 'ggc', 'ctc', 'ctc', 'ttt', 'gat', 'ccc', 'aga', 'ata', 'atg', 'act', 'ctg', 'aga', 'ctt', 'ctt', 'atg', 'tat', 'gaa', 'taa', 'atg', 'cct', 'ggg', 'cca', 'aaa', 'acc']
picture on the left is what Marek's code helped me to achieve, I want to make an improvement so it arrange like the picture on the right
You can use zip()
to zip together two iterators. So if you have
result = [0, 0, 0, 0, 0]
listCodons = ['gtt', 'gaa', 'aca', 'gag', 'aca']
then you can do
>>> list(zip(result, listCodons))
[(0, 'gtt'), (0, 'gaa'), (0, 'aca'), (0, 'gag'), (0, 'aca')]
or, for your example:
with open(e+'.csv','w') as outfile:
out = csv.writer(outfile)
out.writerows(zip(result, listCodons))
这篇关于Python 编写带有行和列转置的 .csv 文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!