在python中按文件类型遍历目录和日志文件 [英] Recurse through directories and log files by file type in python
问题描述
我想调查一组目录并获取以下信息
I want to investigate a set of directories and get the following info
- 按文件类型划分的文件数量
-
具有完整路径的文件类型的文件列表
- No of files by filetype
List of files by filetype with full path
每个子目录目录1& 2
1&2 per subdirectory directory
我有以下代码。 ext列表生成器很好。我被困在如何为每个扩展名分配列表和计数器的名称上。我们不知道这些将是多少。不知道此后还会出现什么其他问题。
I have the following code. The ext list generator is fine. I am stuck on how to assign the name of the list and counter for each extension. We don't know what these will be or how many there are. Not sure what other issues may arise after this.
import os, sys, datetime
top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
outDIR = top+"\\workingFiles" # directory where output is written to. Includes temp files
# END setting base paths
# NOTHING BELOW should need editing.
List =[]
extList=[]
os.chdir(top)
for root, dirs, files in os.walk(SourceDIR, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
ext=fl[fl.rfind('.'):]
if ext not in extList:
extList.append(ext)
List.append(currentFile)
print extList
for ext in extList:
ext+"Counter"=0
ext+"FileList"=[]
for fl in List:
ext=fl[fl.rfind('.'):]
ext+"Counter"+=1
ext+"FileList".append(fl)
for ext in extList:
print ext
print ext+"Counter"
print ext+"FileList"
根据答案更新代码。 txt文件出现问题,因为它只会创建一个文本文件。
CODE updated as per answer. Issue with txt file as it only creates one text file.
# iterate over dictionary keys
for elem in ext_dict.keys():
print elem
print ext_dict[elem]["Counter"]
print ext_dict[elem]["FileList"]
log = open(elem+'_Log.txt', 'a')
Num=0
for fl in ext_dict[elem]["FileList"]:
Num+=1
log.write(str(Num)+","+str(fl)+"\n")
log.close()
可供任何人使用的最终脚本。
#-------------------------------------------------------------------------------
# Name: File_Review
# Purpose: Review of all files in directory/subdirectories with report on file type and size
#
# Author: georgec
#
# Created: 25/01/2013
# Copyright: (c) ATGIS 2013
# Licence: Creative Commons 3.0 - BY
#-------------------------------------------------------------------------------
import os, sys, datetime
top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
SourceDIR = r'P:\2013'
outDIR = top # directory where output is written to. Includes temp files
finalDIR = top+"\\final" # folder for final data only
DirLimiterList=['']
# END setting base paths
# NOTHING BELOW should need editing.
os.chdir(top)
def InvestigateFiles(SourceDIR,outDIR,DirLimiter):
List =[]
extList=[]
dirList=[]
dirCount=0
for root, dirs, files in os.walk(SourceDIR, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
ext=fl[fl.rfind('.')+1:]
if ext!='':
if DirLimiter in currentFile:
List.append(currentFile)
directory1=os.path.basename(os.path.normpath(currentFile[:currentFile.rfind(DirLimiter)]))
directory2=(currentFile[len(SourceDIR):currentFile.rfind('\\'+directory1+DirLimiter)])
directory=directory2+'\\'+directory1
if directory not in dirList:
dirCount+=1
dirList.append(directory)
if ext not in extList:
extList.append(ext)
print extList
ext_dict = {}
# Create the dictionary
for ext in extList:
ext_dict[ext] = {}
ext_dict[ext]["Counter"] = 0
ext_dict[ext]["FileList"] = []
#populate the dictionary
for fl in List:
if ext_dict.has_key(fl[fl.rfind('.')+1:]):
ext = fl[fl.rfind('.')+1:]
ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
ext_dict[ext]["FileList"].append(fl)
# iterate over dictionary keys
for elem in ext_dict.keys():
uniqueDirList=[]
print elem
print ext_dict[elem]["Counter"]
count= ext_dict[elem]["Counter"]
print ext_dict[elem]["FileList"]
log = open(elem+'_'+DirLimiter[DirLimiter.find('\\')+1:DirLimiter.rfind('\\')]+'_Log.txt', 'a')
Num=0
for fl in ext_dict[elem]["FileList"]:
Num+=1
log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+"\n")
## finaldir=fl[fl.rfind(DirLimiter):fl.rfind('\\')]
## directory2=fl[fl.rfind('\\Input\\')+6:fl.rfind('\\')]
## uniqueDir=directory2+finaldir
## if uniqueDir not in uniqueDirList:
## uniqueDirList.append(uniqueDir)
## log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";"+str(uniqueDir)+'\n')
## log.write(finaldir+"\n"+directory2+"\n"+uniqueDir+"\n"+"\n")
## else:
## log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";\n")
## log.write('Directories: '+str(count)+'\n Unique Directories: '+str(len(uniqueDirList)))
log.close()
for DirLimiter in DirLimiterList:
InvestigateFiles(SourceDIR,outDIR,DirLimiter)
您应该使用字典来存储数据
you should use a dictionary for storing the data
ext_dict = {}
# Create the dictionary
for ext in extList:
ext_dict[ext] = {}
ext_dict[ext]["Counter"] = 0
ext_dict[ext]["FileList"] = []
#populate the dictionary
for fl in List:
if ext_dict.has_key(f1[f1.rfind('.'):]):
ext = f1[f1.rfind('.'):]
ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
ext_dict[ext]["FileList"].append(fl)
# iterate over dictionary keys
for elem in ext_dict.keys():
print elem
print ext_dict[elem]["counter"]
print ext_dict[elem]["FileList"]
这篇关于在python中按文件类型遍历目录和日志文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!