检查html文件大小 [英] check html file size
问题描述
有人愿意将以下perl脚本翻译成Python或
Scheme(scsh)吗?
文件采用inpath,并报告所有html其中的文件高于某些
大小。 (计算内嵌图像)
还打印一份html文件及其大小的分类报告。
(脚本的副本在这里:
http://xahlee.org/_scripts/check_file_size.pl
)
Xah
xa*@xahlee.org
a ?? http://xahlee.org/
#perl >
#Tue Oct 4 14:36:48 PDT 2005
#给一个目录,报告所有html文件的大小。 (计算内嵌图像)
#XahLee.org
使用Data :: Dumper;
使用File :: Find;
使用File :: Basename;
$ inpath =''/ Users / t / web / mydirectory /'';
$ sizeLimit = 800 * 1000;
#$ inpath = $ ARGV [0]; #应该给出一条完整的路径;否则
$ File :: Find :: dir不会给出完整的路径。
while($ inpath = ~m @ ^(。+)/ $ @ ){$ inpath = $ 1;}#摆脱拖尾
斜线
死亡dir $ inpath不存在! $"!;除非-e $ inpath;
#################################### ##############
#子程序
#getInlineImg($ file_full_path)返回一个数组列表
内联图片。例如,它可能返回(''xx.jpg'',''.. / image.png'')
sub getInlineImg($){$ full_file_name = $ _ [0];
@linx =(); open(FF,"< $ full_file_name")或die"错误:无法打开
$ full_file_name $!" ;;
while(< FF> ){@txt_segs = split(m / img /,$ _);转移@txt_segs;
for $ lin(@txt_segs){if($ lin = ~m @ src\s * = \s * \"([^ \"] +)\" @i){
推@linx,$ 1; } b / b}关闭FF;
返回@linx;
}
#linkFullPath( $ dir,$ locallink)返回一个字符串,它是本地链接的完整路径
。例如,
linkFullPath(''/ Users / t / public_html / a / b'',''.. / image / t.png'')返回
'' 用户/吨/的public_html /一个/图像/ t.png ''。返回的结果不会是
包含双斜杠或''../''字符串。
sub linkFullPath($$){$ result = $ _ [0]。 $ _ [1]; while($ result =〜
s @ \ / \ / @ \ / @){}; while($ result = ~s @ / [^ \ /] + \ / \。\。@@){};返回
$结果;}
#listLocalLinks($ html_file_full_path)返回一个数组,其中每个
元素是本地链接的完整路径html。
sub listLocalLinks($){
my $ htmlfile = $ _ [0];
my($ name) ,$ dir,$ suffix)= fileparse($ htmlfile,(''\\\''));
我的@aa = getlinks($ htmlfile);
@aa = grep(!m / \#/,@ aa);
@aa = grep(!m / ^ mailto:/,@ aa);
@aa = grep(!m / ^ http:/,@ aa);
我的@ linkedFiles =();
foreach my $ lix( @aa){push @ linkedFiles,linkFullPath($ dir,$ lix);}
返回@linkedFiles;
}
#listInlineImg( $ html_file_full_path)返回一个数组,其中每个元素都是html内嵌图像的完整路径。
sub listInlineImg($){
我的$ htmlfile = $ _ [0];
my($ name,$ dir,$ suffix)= fileparse($ htmlfile,(''\\\'')) ;
我的@aa = getInlineImg($ htmlf ile);
我的@ result =();
foreach my $ ele(@aa){push @result,linkFullPath($ dir,$ ele );}
返回@result;
}
############## ####################################
sub checkLink {
if(
-T $ File :: Find :: name
&& $ File :: Find :: name =〜 m@\.html $ @
){
$ total = -s $ File :: Find :: name;
@ h2 = listInlineImg($ File :: Find :: name);
我的$ ln(@ h2){$ total + = -s $ ln;};
if($ total> $ sizeLimit){print" problem:file:
$ File :: Find :: name,size:$ total\\\
" ;;}
push(@result,[$ total,$ File :: Find ::姓名]);
};
}
find(\& checkLink,$ inpath);
@result = sort {$ b-> [0]< => $ a-> [0]} @result;
print Dumper(\ @ result);
print" done reporting。 (上面印有任何大小以上的文件。)" ;;
__END__
would anyone like to translate the following perl script to Python or
Scheme (scsh)?
the file takes a inpath, and report all html files in it above certain
size. (counting inline images)
also print a sorted report of html files and their size.
(a copy of the script is here:
http://xahlee.org/_scripts/check_file_size.pl
)
Xah
xa*@xahlee.org
a?? http://xahlee.org/
# perl
# Tue Oct 4 14:36:48 PDT 2005
# given a dir, report all html file''s size. (counting inline images)
# XahLee.org
use Data::Dumper;
use File::Find;
use File::Basename;
$inpath = ''/Users/t/web/mydirectory/'';
$sizeLimit = 800 * 1000;
# $inpath = $ARGV[0]; # should give a full path; else the
$File::Find::dir won''t give full path.
while ($inpath =~ m@^(.+)/$@) { $inpath = $1;} # get rid of trailing
slash
die "dir $inpath doesn''t exist! $!" unless -e $inpath;
##################################################
# subroutines
# getInlineImg($file_full_path) returns a array that is a list of
inline images. For example, it may return (''xx.jpg'',''../image.png'')
sub getInlineImg ($) { $full_file_name= $_[0];
@linx =(); open (FF, "<$full_file_name") or die "error: can not open
$full_file_name $!";
while (<FF>) { @txt_segs = split(m/img/, $_); shift @txt_segs;
for $lin (@txt_segs) { if ($lin =~ m@ src\s*=\s*\"([^\"]+)\"@i) {
push @linx, $1; }}
} close FF;
return @linx;
}
# linkFullPath($dir,$locallink) returns a string that is the full path
to the local link. For example,
linkFullPath(''/Users/t/public_html/a/b'', ''../image/t.png'') returns
''Users/t/public_html/a/image/t.png''. The returned result will not
contain double slash or ''../'' string.
sub linkFullPath($$){ $result=$_[0] . $_[1]; while ($result =~
s@\/\/@\/@) {}; while ($result =~ s@/[^\/]+\/\.\.@@) {}; return
$result;}
# listLocalLinks($html_file_full_path) returns a array where each
element is a full path of local links in the html.
sub listLocalLinks($) {
my $htmlfile= $_[0];
my ($name, $dir, $suffix) = fileparse($htmlfile, (''\.html'') );
my @aa = getlinks($htmlfile);
@aa = grep(!m/\#/, @aa);
@aa = grep (!m/^mailto:/, @aa);
@aa = grep (!m/^http:/, @aa);
my @linkedFiles=();
foreach my $lix (@aa) { push @linkedFiles, linkFullPath($dir,$lix);}
return @linkedFiles;
}
# listInlineImg($html_file_full_path) returns a array where each
element is a full path to inline images in the html.
sub listInlineImg($) {
my $htmlfile= $_[0];
my ($name, $dir, $suffix) = fileparse($htmlfile, (''\.html'') );
my @aa = getInlineImg($htmlfile);
my @result=();
foreach my $ele (@aa) { push @result, linkFullPath($dir,$ele);}
return @result;
}
##################################################
sub checkLink {
if (
-T $File::Find::name
&& $File::Find::name =~ m@\.html$@
) {
$total= -s $File::Find::name;
@h2 = listInlineImg($File::Find::name);
for my $ln (@h2) {$total += -s $ln;};
if ( $total > $sizeLimit) {print "problem: file:
$File::Find::name, size: $total\n";}
push (@result, [$total, $File::Find::name]);
};
}
find(\&checkLink, $inpath);
@result = sort { $b->[0] <=> $a->[0]} @result;
print Dumper(\@result);
print "done reporting. (any file above size are printed above.)";
__END__
推荐答案
inpath =''/ Users / t / web / mydirectory /'';
inpath = ''/Users/t/web/mydirectory/'';
sizeLimit = 800 * 1000;
#
sizeLimit = 800 * 1000;
#
inpath =
这篇关于检查html文件大小的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!