在python中按文件类型遍历目录和日志文件 [英] Recurse through directories and log files by file type in python

查看:127
本文介绍了在python中按文件类型遍历目录和日志文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想调查一组目录并获取以下信息

I want to investigate a set of directories and get the following info


  1. 按文件类型划分的文件数量

  2. 具有完整路径的文件类型的文件列表

  1. No of files by filetype
  2. List of files by filetype with full path

每个子目录目录1& 2

1&2 per subdirectory directory

我有以下代码。 ext列表生成器很好。我被困在如何为每个扩展名分配列表和计数器的名称上。我们不知道这些将是多少。不知道此后还会出现什么其他问题。

I have the following code. The ext list generator is fine. I am stuck on how to assign the name of the list and counter for each extension. We don't know what these will be or how many there are. Not sure what other issues may arise after this.

import os, sys, datetime

top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
outDIR = top+"\\workingFiles" # directory where output is written to. Includes temp files
# END setting base paths
# NOTHING BELOW should need editing.
List =[]
extList=[]

os.chdir(top)

for root, dirs, files in os.walk(SourceDIR, topdown=False):
    for fl in files:
      currentFile=os.path.join(root, fl)
      ext=fl[fl.rfind('.'):]
      if ext not in extList:
        extList.append(ext)
      List.append(currentFile)

print extList

for ext in extList:
    ext+"Counter"=0
    ext+"FileList"=[]

for fl in List:
    ext=fl[fl.rfind('.'):]
    ext+"Counter"+=1
    ext+"FileList".append(fl)

for ext in extList:
    print ext
    print ext+"Counter"
    print ext+"FileList"

根据答案更新代码。 txt文件出现问题,因为它只会创建一个文本文件。

CODE updated as per answer. Issue with txt file as it only creates one text file.

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["Counter"]
    print ext_dict[elem]["FileList"]
    log = open(elem+'_Log.txt', 'a')
    Num=0
    for fl in ext_dict[elem]["FileList"]:
        Num+=1
        log.write(str(Num)+","+str(fl)+"\n")
    log.close()

可供任何人使用的最终脚本。

#-------------------------------------------------------------------------------
# Name:    File_Review
# Purpose: Review of all files in directory/subdirectories with report on file type and size
#
# Author:      georgec
#
# Created:     25/01/2013
# Copyright:   (c) ATGIS 2013
# Licence:     Creative Commons 3.0 - BY
#-------------------------------------------------------------------------------

import os, sys, datetime

top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
SourceDIR = r'P:\2013'
outDIR = top # directory where output is written to. Includes temp files
finalDIR = top+"\\final" # folder for final data only
DirLimiterList=['']

# END setting base paths
# NOTHING BELOW should need editing.

os.chdir(top)

def InvestigateFiles(SourceDIR,outDIR,DirLimiter):
    List =[]
    extList=[]
    dirList=[]
    dirCount=0
    for root, dirs, files in os.walk(SourceDIR, topdown=False):
        for fl in files:
            currentFile=os.path.join(root, fl)
            ext=fl[fl.rfind('.')+1:]
            if ext!='':
                if DirLimiter in currentFile:
                    List.append(currentFile)
                    directory1=os.path.basename(os.path.normpath(currentFile[:currentFile.rfind(DirLimiter)]))
                    directory2=(currentFile[len(SourceDIR):currentFile.rfind('\\'+directory1+DirLimiter)])
                    directory=directory2+'\\'+directory1
                    if directory not in dirList:
                        dirCount+=1
                        dirList.append(directory)


            if ext not in extList:
              extList.append(ext)

    print extList

    ext_dict = {}

    # Create the dictionary
    for ext in extList:
        ext_dict[ext] = {}
        ext_dict[ext]["Counter"] = 0
        ext_dict[ext]["FileList"] = []

    #populate the dictionary
    for fl in List:
        if ext_dict.has_key(fl[fl.rfind('.')+1:]):
            ext = fl[fl.rfind('.')+1:]
            ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
            ext_dict[ext]["FileList"].append(fl)

    # iterate over dictionary keys
    for elem in ext_dict.keys():
        uniqueDirList=[]
        print elem
        print ext_dict[elem]["Counter"]
        count= ext_dict[elem]["Counter"]
        print ext_dict[elem]["FileList"]
        log = open(elem+'_'+DirLimiter[DirLimiter.find('\\')+1:DirLimiter.rfind('\\')]+'_Log.txt', 'a')
        Num=0
        for fl in ext_dict[elem]["FileList"]:
            Num+=1
            log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+"\n")
##            finaldir=fl[fl.rfind(DirLimiter):fl.rfind('\\')]
##            directory2=fl[fl.rfind('\\Input\\')+6:fl.rfind('\\')]
##            uniqueDir=directory2+finaldir
##            if uniqueDir not in uniqueDirList:
##             uniqueDirList.append(uniqueDir)
##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";"+str(uniqueDir)+'\n')
##             log.write(finaldir+"\n"+directory2+"\n"+uniqueDir+"\n"+"\n")
##            else:
##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";\n")
##        log.write('Directories: '+str(count)+'\n Unique Directories: '+str(len(uniqueDirList)))
        log.close()

for DirLimiter in DirLimiterList:
 InvestigateFiles(SourceDIR,outDIR,DirLimiter)


解决方案

您应该使用字典来存储数据

you should use a dictionary for storing the data

ext_dict = {}

# Create the dictionary
for ext in extList:
    ext_dict[ext] = {}
    ext_dict[ext]["Counter"] = 0
    ext_dict[ext]["FileList"] = []

#populate the dictionary
for fl in List:
    if ext_dict.has_key(f1[f1.rfind('.'):]):
        ext = f1[f1.rfind('.'):]
        ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
        ext_dict[ext]["FileList"].append(fl)

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["counter"]
    print ext_dict[elem]["FileList"]

这篇关于在python中按文件类型遍历目录和日志文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆