检查html文件大小 [英] check html file size

查看:121
本文介绍了检查html文件大小的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

有人愿意将以下perl脚本翻译成Python或

Scheme(scsh)吗?


文件采用inpath,并报告所有html其中的文件高于某些

大小。 (计算内嵌图像)

还打印一份html文件及其大小的分类报告。


(脚本的副本在这里:
http://xahlee.org/_scripts/check_file_size.pl




Xah
xa*@xahlee.org

a ?? http://xahlee.org/

#perl

#Tue Oct 4 14:36:48 PDT 2005

#给一个目录,报告所有html文件的大小。 (计算内嵌图像)

#XahLee.org


使用Data :: Dumper;

使用File :: Find;

使用File :: Basename;


$ inpath =''/ Users / t / web / mydirectory /'';

$ sizeLimit = 800 * 1000;


#$ inpath = $ ARGV [0]; #应该给出一条完整的路径;否则

$ File :: Find :: dir不会给出完整的路径。

while($ inpath = ~m @ ^(。+)/ $ @ ){$ inpath = $ 1;}#摆脱拖尾

斜线


死亡dir $ inpath不存在! $"!;除非-e $ inpath;

#################################### ##############

#子程序

#getInlineImg($ file_full_path)返回一个数组列表

内联图片。例如,它可能返回(''xx.jpg'',''.. / image.png'')

sub getInlineImg($){$ full_file_name = $ _ [0];

@linx =(); open(FF,"< $ full_file_name")或die"错误:无法打开

$ full_file_name $!" ;;

while(< FF> ){@txt_segs = split(m / img /,$ _);转移@txt_segs;

for $ lin(@txt_segs){if($ lin = ~m @ src\s * = \s * \"([^ \"] +)\" @i){

推@linx,$ 1; } b / b}关闭FF;

返回@linx;

}


#linkFullPath( $ dir,$ locallink)返回一个字符串,它是本地链接的完整路径

。例如,

linkFullPath(''/ Users / t / public_html / a / b'',''.. / image / t.png'')返回

'' 用户/吨/的public_html /一个/图像/ t.png ''。返回的结果不会是
包含双斜杠或''../''字符串。

sub linkFullPath($$){$ result = $ _ [0]。 $ _ [1]; while($ result =〜

s @ \ / \ / @ \ / @){}; while($ result = ~s @ / [^ \ /] + \ / \。\。@@){};返回

$结果;}

#listLocalLinks($ html_file_full_path)返回一个数组,其中每个

元素是本地链接的完整路径html。

sub listLocalLinks($){

my $ htmlfile = $ _ [0];


my($ name) ,$ dir,$ suffix)= fileparse($ htmlfile,(''\\\''));

我的@aa = getlinks($ htmlfile);

@aa = grep(!m / \#/,@ aa);

@aa = grep(!m / ^ mailto:/,@ aa);

@aa = grep(!m / ^ http:/,@ aa);


我的@ linkedFiles =();

foreach my $ lix( @aa){push @ linkedFiles,linkFullPath($ dir,$ lix);}

返回@linkedFiles;

}

#listInlineImg( $ html_file_full_path)返回一个数组,其中每个元素都是html内嵌图像的完整路径。

sub listInlineImg($){

我的$ htmlfile = $ _ [0];


my($ name,$ dir,$ suffix)= fileparse($ htmlfile,(''\\\'')) ;

我的@aa = getInlineImg($ htmlf ile);


我的@ result =();

foreach my $ ele(@aa){push @result,linkFullPath($ dir,$ ele );}

返回@result;

}


############## ####################################

sub checkLink {

if(

-T $ File :: Find :: name

&& $ File :: Find :: name =〜 m@\.html $ @

){

$ total = -s $ File :: Find :: name;

@ h2 = listInlineImg($ File :: Find :: name);

我的$ ln(@ h2){$ total + = -s $ ln;};

if($ total> $ sizeLimit){print" problem:file:

$ File :: Find :: name,size:$ total\\\
" ;;}

push(@result,[$ total,$ File :: Find ::姓名]);

};

}


find(\& checkLink,$ inpath);


@result = sort {$ b-> [0]< => $ a-> [0]} @result;


print Dumper(\ @ result);

print" done reporting。 (上面印有任何大小以上的文件。)" ;;


__END__

would anyone like to translate the following perl script to Python or
Scheme (scsh)?

the file takes a inpath, and report all html files in it above certain
size. (counting inline images)
also print a sorted report of html files and their size.

(a copy of the script is here:
http://xahlee.org/_scripts/check_file_size.pl
)

Xah
xa*@xahlee.org
a?? http://xahlee.org/
# perl

# Tue Oct 4 14:36:48 PDT 2005
# given a dir, report all html file''s size. (counting inline images)
# XahLee.org

use Data::Dumper;
use File::Find;
use File::Basename;

$inpath = ''/Users/t/web/mydirectory/'';
$sizeLimit = 800 * 1000;

# $inpath = $ARGV[0]; # should give a full path; else the
$File::Find::dir won''t give full path.
while ($inpath =~ m@^(.+)/$@) { $inpath = $1;} # get rid of trailing
slash

die "dir $inpath doesn''t exist! $!" unless -e $inpath;
##################################################
# subroutines
# getInlineImg($file_full_path) returns a array that is a list of
inline images. For example, it may return (''xx.jpg'',''../image.png'')
sub getInlineImg ($) { $full_file_name= $_[0];
@linx =(); open (FF, "<$full_file_name") or die "error: can not open
$full_file_name $!";
while (<FF>) { @txt_segs = split(m/img/, $_); shift @txt_segs;
for $lin (@txt_segs) { if ($lin =~ m@ src\s*=\s*\"([^\"]+)\"@i) {
push @linx, $1; }}
} close FF;
return @linx;
}

# linkFullPath($dir,$locallink) returns a string that is the full path
to the local link. For example,
linkFullPath(''/Users/t/public_html/a/b'', ''../image/t.png'') returns
''Users/t/public_html/a/image/t.png''. The returned result will not
contain double slash or ''../'' string.
sub linkFullPath($$){ $result=$_[0] . $_[1]; while ($result =~
s@\/\/@\/@) {}; while ($result =~ s@/[^\/]+\/\.\.@@) {}; return
$result;}
# listLocalLinks($html_file_full_path) returns a array where each
element is a full path of local links in the html.
sub listLocalLinks($) {
my $htmlfile= $_[0];

my ($name, $dir, $suffix) = fileparse($htmlfile, (''\.html'') );
my @aa = getlinks($htmlfile);
@aa = grep(!m/\#/, @aa);
@aa = grep (!m/^mailto:/, @aa);
@aa = grep (!m/^http:/, @aa);

my @linkedFiles=();
foreach my $lix (@aa) { push @linkedFiles, linkFullPath($dir,$lix);}
return @linkedFiles;
}
# listInlineImg($html_file_full_path) returns a array where each
element is a full path to inline images in the html.
sub listInlineImg($) {
my $htmlfile= $_[0];

my ($name, $dir, $suffix) = fileparse($htmlfile, (''\.html'') );
my @aa = getInlineImg($htmlfile);

my @result=();
foreach my $ele (@aa) { push @result, linkFullPath($dir,$ele);}
return @result;
}

##################################################
sub checkLink {
if (
-T $File::Find::name
&& $File::Find::name =~ m@\.html$@
) {
$total= -s $File::Find::name;
@h2 = listInlineImg($File::Find::name);
for my $ln (@h2) {$total += -s $ln;};
if ( $total > $sizeLimit) {print "problem: file:
$File::Find::name, size: $total\n";}

push (@result, [$total, $File::Find::name]);
};
}

find(\&checkLink, $inpath);

@result = sort { $b->[0] <=> $a->[0]} @result;

print Dumper(\@result);
print "done reporting. (any file above size are printed above.)";

__END__

推荐答案

inpath =''/ Users / t / web / mydirectory /'';
inpath = ''/Users/t/web/mydirectory/'';


sizeLimit = 800 * 1000;


sizeLimit = 800 * 1000;

#


inpath =


这篇关于检查html文件大小的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆