速度问题 [英] speed problems
问题描述
嗨组,
我前段时间对Python感兴趣并将一个简单的
perl脚本转换为python。该脚本非常简单,它从一些maillog文件中生成一个
发现的病毒列表,以便进一步处理。
我发现这里存在巨大的差异脚本的执行时间,
支持perl,我无法确定出现了什么问题;
perl运行:
0.07实际0.05用户0.01 sys
0.07实际0.05用户0.01 sys
0.07实际0.04用户0.02 sys
python运行:
0.27实际0.23用户0.03 sys
0.28实际0.21用户0.05 sys
0.27实际0.19用户0.06 sys
这是使用一个小的未压缩日志文件(1.4M)测量的。每当需要解压缩东西时,差异
会变得更大。
以下是这两个脚本,请你看看并告诉我我在哪里/>
应该寻找优化吗?
perl:
my(@maillogs)=(
" / home / logs / maillog"," /home/logs/maillog.0.gz" ;,
" /home/logs/maillog.1.gz"," / home / logs /maillog.2.gz",
" /home/logs/maillog.3.gz" ;,
);
my($ gzip)=" / usr / bin / gzip" ;;
my($ bzip2)=" / usr / bin / bzip2" ;;
my($ total)= 0.0;
my(%virstat);
foreach my logfile(@maillogs)
{
if(-f $ logfile)
{
#是压缩的吗?
if($ logfile =〜/\。[bg] z2?$ /)
{
if(!open LF," $ gzip -cd $ logfile | ")
{
打开LF," $ bzip2 -cd $ logfile |"或者
死无法解压'$ logfile''\ nn" ;;
}
}
else
{
open LF,"< $ logfile"或者死无法打开'$ logfile''\ n" ;;
}
while(< LF>)
{
if(/ INFECTED /)
{
#我们只需要病毒名称
$ _ =〜s /.* INFECTED。* \((。*)\)。* / $ 1 / g;
#如果发现多个病毒
if(/,/)
{
#拆分他们
my(@vir)= split /,/,$ _;
foreach my $ v(@vir)
{
chomp $ v;
$ virstat {$ v } ++;
$ total ++;
}
}
else
{
chomp;
$ virstat {$ _} ++;
$ total ++;
}
}
}
关闭LF;
}
#else
#{
#print STDERR"''$ logfile''不存在,跳过它。\ n" ;;
#} < br $>
}
预计我的$ v(排序键%virstat)
{
我的$ p =($ virstat {$ v} / $ total)* 100;
$ p = sprintf"%s:\ t%5.2f %%",$ v,$ p;
print" $ p\\\
" ;;
}
#--- perl脚本结束---
python:
import os
导入字符串
导入重新
maillogs = [
" / home / logs / maillog" ;," /home/logs/maillog.0.gz",
" /home/logs/maillog.1.gz" ;,&home/logs/maillog.2.gz" ;,
" /home/logs/maillog.3.gz"
]
virstat = {}
total = 0.0#keep this float
maillogs中的logfile:
if os.path.isfile(logfile):
#是压缩的吗?
如果logfile [-3:] ==''。gz'':
import gzip
lf = gzip.GzipFile(logfile," r" )
else:
if logfile [-4:] ==''。bz2'':
import bz2
lf = bz2.BZ2File(logfile," r")
else:
#uncompressed
lf = open(logfile, r,
for lf.readlines()中的行:
if string.count(line," INFECTED"):
vname = re.compile(" INFECTED \((。*)\)")。search(line).group(1)
if string.count( vname,","):
for vnam in string.split(vname,","):
如果vnam不在virstat中:
virstat [vnam] = 1
否则:
virstat [vnam] + = 1
总计+ = 1
else:
如果vname不在virstat中:
virstat [vname] = 1
else:
virstat [vname] + = 1
总计+ = 1
lf.close()
#else:
#print" logfile''%s''不存在,跳过它。 %logfile
$ v $ b for vname in virstat.keys():
p =(virstat [vname] / total)* 100
print"%s:%5.2f %%" %(vname,p)
#--- python脚本结束---
感谢您提供的任何帮助,
种类问候,
Axel
gzip)=" / usr / bin / gzip" ;;
my(
bzip2)=" / usr / bin / bzip2" ;;
my(
total)= 0.0;
my(%virstat);
foreach my
Hi group,
I''ve become interested in Python a while ago and just converted a simple
perl script to python. The script is very simple, it generates a list of
found virusses from some maillog files for further processing.
I''ve found that there''s a huge difference in execution time for the scripts,
in favor of perl and I can''t pinpoint what''s going wrong;
perl runs:
0.07 real 0.05 user 0.01 sys
0.07 real 0.05 user 0.01 sys
0.07 real 0.04 user 0.02 sys
python runs:
0.27 real 0.23 user 0.03 sys
0.28 real 0.21 user 0.05 sys
0.27 real 0.19 user 0.06 sys
This was measured with a small uncompressed logfile (1.4M). The difference
grows much bigger whenever it needs to uncompress things.
Here are both scripts, could you please have a look and tell me where I
should look for optimizations?
perl:
my (@maillogs) = (
"/home/logs/maillog", "/home/logs/maillog.0.gz",
"/home/logs/maillog.1.gz", "/home/logs/maillog.2.gz",
"/home/logs/maillog.3.gz",
);
my ($gzip) = "/usr/bin/gzip";
my ($bzip2)= "/usr/bin/bzip2";
my ($total) = 0.0;
my (%virstat);
foreach my $logfile (@maillogs)
{
if ( -f $logfile )
{
# is it compressed?
if ( $logfile =~ /\.[bg]z2?$/ )
{
if ( !open LF, "$gzip -cd $logfile|" )
{
open LF, "$bzip2 -cd $logfile|" or
die "unable to uncompress ''$logfile''\n";
}
}
else
{
open LF, "<$logfile" or die "couldn''t open ''$logfile''\n";
}
while (<LF>)
{
if (/INFECTED/)
{
# we need only the virus name
$_ =~ s/.*INFECTED.*\((.*)\).*/$1/g;
# if multiple virusses found
if (/, /)
{
# split them
my (@vir) = split /, /, $_;
foreach my $v (@vir)
{
chomp $v;
$virstat{$v}++;
$total++;
}
}
else
{
chomp;
$virstat{$_}++;
$total++;
}
}
}
close LF;
}
# else
# {
# print STDERR "''$logfile'' doesn''t exist, skipping it.\n";
# }
}
foreach my $v (sort keys %virstat)
{
my $p = ($virstat{$v}/$total)*100;
$p = sprintf "%s:\t%5.2f%%", $v, $p;
print "$p\n";
}
#---end of perl script ---
python:
import os
import string
import re
maillogs = [
"/home/logs/maillog", "/home/logs/maillog.0.gz",
"/home/logs/maillog.1.gz", "/home/logs/maillog.2.gz",
"/home/logs/maillog.3.gz"
]
virstat={}
total=0.0 # keep this float
for logfile in maillogs:
if os.path.isfile( logfile ):
# is it compressed?
if logfile[-3:] == ''.gz'':
import gzip
lf = gzip.GzipFile( logfile, "r" )
else:
if logfile[-4:] == ''.bz2'':
import bz2
lf = bz2.BZ2File( logfile, "r" )
else:
# uncompressed
lf = open( logfile, "r" )
for line in lf.readlines():
if string.count( line, "INFECTED" ):
vname = re.compile( "INFECTED \((.*)\)" ).search( line ).group(1)
if string.count( vname, ", " ):
for vnam in string.split( vname, ", " ):
if vnam not in virstat:
virstat[vnam] = 1
else:
virstat[vnam] += 1
total += 1
else:
if vname not in virstat:
virstat[vname] = 1
else:
virstat[vname] += 1
total += 1
lf.close()
# else:
# print "logfile ''%s'' doesn''t exist, skipping it." % logfile
for vname in virstat.keys():
p = (virstat[vname]/total)*100
print "%s: %5.2f%%" % (vname, p)
#--- End of python script ---
Thanks for any help you can provide,
Kind regards,
Axel
gzip) = "/usr/bin/gzip";
my (
bzip2)= "/usr/bin/bzip2";
my (
total) = 0.0;
my (%virstat);
foreach my
这篇关于速度问题的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!