Perl,Code片段,代码实例,代码示例,代码片段库

Perl 母亲3翻译博客RSS整理

#!/usr/bin/perl -w

use strict;
use LWP::Simple;
use HTML::TreeBuilder;
use LWP::Parallel::UserAgent;
use XML::TreeBuilder;
use Encode;

my $ua = LWP::Parallel::UserAgent-&gt;new();
print STDERR &quot;Grabbing feed XML...\n&quot;;
my $xml = get(&quot;http://feeds.feedburner.com/Mother3FanTranslation?format=xml&quot;);
my $atomfeed = XML::TreeBuilder-&gt;new();
$atomfeed-&gt;parse($xml);
my %entries;

print STDERR &quot;Scraping links from XML...\n&quot;;
foreach my $item ($atomfeed-&gt;look_down(&quot;_tag&quot;, &quot;item&quot;)) {
	my $link = $item-&gt;look_down(&quot;_tag&quot;, &quot;link&quot;)-&gt;as_text();
	$entries{$link} = $item;
}

foreach my $item ($atomfeed-&gt;look_down(&quot;_tag&quot;, &quot;item&quot;)) {
	my $url = $item-&gt;look_down(&quot;_tag&quot;, &quot;link&quot;)-&gt;as_text();
	my $request = HTTP::Request-&gt;new();
	
	$request-&gt;uri($url);
	$request-&gt;method(&quot;GET&quot;);

	print STDERR &quot;Registering $url...\n&quot;;
	$ua-&gt;register($request);	
}

print STDERR &quot;Downloading HTML...&quot;;
my $html = $ua-&gt;wait();
print STDERR &quot;done!\n&quot;;

foreach my $entry (values(%$html)) {
	my $response = $entry-&gt;response();
	my $url = $response-&gt;base()-&gt;as_string();

	print STDERR &quot;Processing $url...\n&quot;;
	my $item = $entries{$url};

	print STDERR &quot;\tDeleting description content...\n&quot;;
	my $description = $item-&gt;look_down(&quot;_tag&quot;, &quot;content:encoded&quot;);
	$description-&gt;delete_content();

	print STDERR &quot;\tGrabbing relavent HTML via regular expression...\n&quot;;
	my $blogentry = $response-&gt;content();
	(undef, my $blogtree) = ($blogentry =~ m/&lt;p class=&quot;meta&quot;&gt;(.*?)&lt;\/p&gt;(.*?)&lt;p class=&quot;meta&quot;&gt;Posted/s);

	my $html = HTML::TreeBuilder-&gt;new_from_content(
		decode_utf8($response-&gt;content())
	);

	my $comments = &quot;&lt;br /&gt;&lt;br /&gt;Mato Comments:&lt;br /&gt;&quot;;

	print STDERR &quot;\tFinding Mato comments...\n&quot;;
	foreach ($html-&gt;look_down(&quot;_tag&quot;, &quot;div&quot;, &quot;style&quot;, qr/DCB6B6/)) {
			print STDERR &quot;\t\tFound a comment!\n&quot;;
			foreach ($_-&gt;look_down(&quot;_tag&quot;, &quot;p&quot;)) {
				$comments .= &quot;&lt;blockquote&gt;&quot;.$_-&gt;as_HTML().&quot;&lt;/blockquote&gt;&quot;;
			}
	}

	$blogtree = HTML::TreeBuilder-&gt;new_from_content(
		decode_utf8($blogtree.$comments)
	);

	print STDERR &quot;\tCleaning up HTML for Liferea...\n&quot;;
	foreach my $hrdiv ($blogtree-&gt;look_down(&quot;_tag&quot;, &quot;div&quot;, &quot;class&quot;, &quot;hr&quot;)) {
		$hrdiv-&gt;delete();
	}



	print STDERR &quot;\tPushing content to $url entry...\n&quot;;
	$description-&gt;push_content($blogtree-&gt;as_HTML());
}

print $atomfeed-&gt;as_XML();

Perl 清理已翻译为HTML的Word文档

#/usr/local/bin/perl    -w
use strict;

#############################################################
#                                                           #
#                                                           #
#                                                           #
#                      NOAH SUSSMAN                         #
#                                                           #
#                     clean up word                         #
#                                                           #
#                Created 5/16/01 at 02:33 PM                #
#                                                           #
# Clean up Word documents that have been translated to HTML #    
#                                                           #
#                                                           #
#############################################################

@ARGV[0] = &quot;Macintosh HD:NOAH:2001:05-MAY 2001:3-May 15-21:3-Revisions to Corp Site:large number of Word docs:1.2 Services.html&quot; ;

$^I=&quot;.bk&quot;;

undef $/ ;           # slurp the whole file into $_

while (&lt;&gt;) {

	s{&lt;(?!/?(a|b|img|center|p|ul|ol|li|table|td|tr|html|body|head|title))\s*[^&gt;]*&gt;\s*}{}gi;    # Destroy all tags except A, B, IMG, CENTER, P, UL, OL, LI, TABLE, TD, TR, HTML, BODY, HEAD and TITLE

	s{&lt;(\w+)&gt;(.*?)&lt;([^$1])&gt;(.*?)&lt;(/$1)&gt;(.*?)&lt;(/$2)&gt;}{&lt;$3&gt;$2&lt;$1&gt;$4&lt;$5&gt;$6&lt;$7&gt;}gi;    #  Fix mis-nested tags, if any.

	print $_ ;
	
}

Perl 在目录中列出文件

List text files in the current directory

@files = grep { -f and -T } glob '* .*';
print &quot;@files\n&quot;;

Perl 从远程HTML页面批量下载代码之间的代码

#!/usr/bin/env perl
#
# grabcode.pl
# Download code between &lt;pre&gt; tags from remote HTML pages
# Takes a list of urls as argument

use strict; use warnings;

use WWW::Mechanize;
use HTML::TreeBuilder::XPath;
use Encode;

my @urls = @ARGV;

my $browser = WWW::Mechanize-&gt;new;
$browser-&gt;agent_alias('Linux Mozilla');
#$browser-&gt;credentials('uname', 'passwd');

foreach my $url (@urls) {
	
	my $page; 
	if ( $browser-&gt;get($url)-&gt;is_success() ) {
		$page = $browser-&gt;content();
	}
	else {
		warn &quot;Skipping $url:\n$browser-&gt;status_line\n&quot;;
		next;
	} 

	my $tree= HTML::TreeBuilder::XPath-&gt;new;
	$tree-&gt;parse( $page );
		
	my $nodes = $tree-&gt;findnodes( '//pre');
	while ( my $node = $nodes-&gt;shift() ) {
		print encode(&quot;utf8&quot;,$node-&gt;as_text());
		print &quot;\n&quot;;
	}
}

Perl 将ISBN文件名转换为BibTeX记录

#!/usr/bin/env perl

use strict; 
use warnings;

use Encoding &quot;utf8&quot;;
use Text::BibTeX;
use WebService::ISBNDB::API::Books;
use Getopt::Long;
use Pod::Usage;

my %options;
GetOptions('usage|?'  =&gt; \$options{usage},
           'h|help' 	=&gt; \$options{help}
          );
pod2usage(1) if $options{usage};
pod2usage(-verbose =&gt; 2) if $options{help};

my $api_key = $ENV{ISBNDB_KEY} || 'TMDKWJSX';

my $dir = shift || '.';
my $file = shift || '&amp;STDOUT';
my $bib = Text::BibTeX::File-&gt;new('&gt;'.$file); 


opendir my $dh, $dir
	or die &quot;Cannot open $dir: $!\n&quot;;
my @files =  grep { -f &amp;&amp; m{/\d{9}[x|\d]\.pdf$}i } 
		map {&quot;$dir/$_&quot;} 
			readdir $dh;

foreach my $file (@files) {

	# extract isbn from file name
	my $isbn = $file =~ /(\d{9}[x|\d])\.pdf$/i ? $1 : '0000000000' ; 
	# check database for isbn number, loop if failed
	my $book = WebService::ISBNDB::API::Books-&gt;find( { api_key =&gt; $api_key, isbn =&gt; $isbn } );
  next unless $book;

	# set new bibtex entry
	my $entry = new Text::BibTeX::Entry;
	$entry-&gt;set_metatype(BTE_REGULAR);
	$entry-&gt;set_type('book');
	$entry-&gt;set_key($isbn);

	# set title field
	$entry-&gt;set( 'title', $book-&gt;get_longtitle || $book-&gt;get_title );

	# set author or editor field
	my $authors = $book-&gt;get_authors_text;
	# some clean-up
	$authors =~ s/^by //;
	$authors =~ s/,$//;
	$authors =~ s/,\s+/ and /g;
	$authors =~ s/;\s+/ and /g;
	# authors or editors ?	
	if ( $authors =~ /^\s*\[?edited by\s+\]?(.*)$/i ) {
		(my $editors = $1) =~ s/with/and/;
		$entry-&gt;set('editor', $editors);
	}
	elsif ( $authors =~ /\(Editor\)/i ) {
		$authors =~ s/\s*\(Editor\)//gi;
	}
	else {
		$entry-&gt;set('author', $authors);
	}

	# parse publisher and edition fields for publisher and year data
	if ( $book-&gt;get_publisher_text =~ m/^(.*?),\s+c?(\d{4}).?$/ ) {
		$entry-&gt;set( 'publisher', $1 ) ;
		$entry-&gt;set( 'year', $2 );
		
	}
	else {
		$entry-&gt;set( 'publisher', $book-&gt;get_publisher_text ) ;
		if ( $book-&gt;get_edition_info =~ m/(\d{4})/ ) {
			$entry-&gt;set( 'year', $1 );
		}		
	}
	
	# miscellaneous fields
	my $notes = $book-&gt;get_notes;
	$entry-&gt;set( 'notes', $notes ) if $notes ;
	my $abstract = $book-&gt;get_summary;
	$entry-&gt;set( 'abstract', $abstract ) if $abstract ;
	
	$entry-&gt;set( 'local-url', $file);
	
	$entry-&gt;write($bib);
	
	# sleep 2;
}

__END__


=head1 NAME

isbn2bibtex.pl - Convert ISBN file names to BibTeX records 

=head1 SYNOPSIS

isbn2bibtex.pl [-? | --help] | [directory] [outfile.bib]

=head1 DESCRIPTION

Scans a directory for PDF files whose name are ISBN-10 identifiers,
fetches the corresponding book's data from isbndb.com, parses data
fields to get rid of inconsistencies, and finally, outputs a bibtex 
file with all fields set accordingly.

	-?             print usage
	-h --help      verbose help message
	
If no directory is given, scans the current directory. Outputs result
to STDOUT, unless a second argument is given.

An API key is required to access isbndb.com services. You can either 
paste it in the source code or set the environment variable ISBNDB_KEY.

=head1 LICENSE

Free to use and modifiy, same terms as Perl itself.

=head1 AUTHOR

i-blis, I&lt;i-blis yandex ru&gt;. 

=cut

Perl 生成随机有效的挪威SSN

#
# random_norwegian_ssn()
#
# Accepts one parameter, a date string in the format DDMMYY.
#
sub random_norwegian_ssn {
    my $date = shift;

    # Extract date, month, year
    my $d1 = substr($date,0,1);
    my $d2 = substr($date,1,1);
    my $m1 = substr($date,2,1);
    my $m2 = substr($date,3,1);
    my $y1 = substr($date,4,1);
    my $y2 = substr($date,5,1);

    my ($i1, $i2, $i3);
    my ($c1, $c2);

    do {
        # SSNs for dates between 1900-1999 use an entity number between 0-499
        my $random_num = int(rand(499));

        # Pad the entity number to 3 numbers
        my $padded_num = sprintf(&quot;%03d&quot;, $random_num);
        ($i1, $i2, $i3) = split(//, $padded_num);

        # Calculate the two control numbers
        my $v1 = (3*$d1) + (7*$d2) + (6*$m1) + $m2 + (8*$y1) + (9*$y2) + (4*$i1) + (5*$i2) + (2*$i3);
        $c1 = ($v1 % 11) == 0 ? 0 : 11-($v1 % 11);

        my $v2 = (5*$d1) + (4*$d2) + (3*$m1) + (2*$m2) + (7*$y1) + (6*$y2) + (5*$i1) +(4*$i2) + (3*$i3) + (2*$c1);
        $c2 = ($v2 % 11) == 0 ? 0 : 11-($v2 % 11);

    } until ($c1 &lt; 10 &amp;&amp; $c2 &lt; 10);

    return &quot;$d1$d2$m1$m2$y1$y2$i1$i2$i3$c1$c2&quot;;
}

Perl （Perl）使用WMI获取系统信息

#!/usr/bin/perl -w

# use bin\perl.exe wmi.pl to run.
# have fun!
# 2009/6/17 twitter.com/vinocui
# 
# useful links:
# (WMI space definition) http://msdn.microsoft.com/en-us/library/aa394084(VS.85).aspx
# (OLE usage on CPAN)    http://cpan.uwinnipeg.ca/htdocs/Win32-OLE/Win32/OLE.html#Object_methods_and_properties
#

use Win32::OLE;

#my $wmi = Win32::OLE-&gt;GetObject(&quot;winmgmts://./root/cimv2&quot;) or die &quot;failed to retrieve cimv2.&quot;;
# winmgmts means to access WMI service.

my $wmi = Win32::OLE-&gt;GetObject(&quot;WinMgmts://./root/cimv2&quot;) or die &quot;Failed: GetObject\n&quot;;
my $list, my $v;


$list = $wmi-&gt;InstancesOf(&quot;Win32_Processor&quot;) or die &quot;Failed: InstancesOf\n&quot;;

foreach $v (Win32::OLE::in $list){
    print &quot;CPU:\n&quot;;
    print &quot;\t&quot;, $v-&gt;{Name}, &quot;\n&quot;;
    print &quot;\t&quot;, $v-&gt;{Caption}, &quot;\n&quot;;
}
                           
$list = $wmi-&gt;InstancesOf(&quot;Win32_OperatingSystem&quot;) or die &quot;Failed: InstancesOf\n&quot;;

foreach $v (Win32::OLE::in $list){
    print &quot;OS:\n&quot;;
    print &quot;\t&quot;, $v-&gt;{Name}, &quot;\n&quot;;
}

0;

Perl 与Perl模糊字符串匹配

use String::Approx 'amatch';
use Test::More(no_plan);

sub fuzm {

  $_ = shift;

  return amatch(&quot;homer_simpson&quot;, [        # this array sets match options:
                                  &quot;i&quot;,    # match case-insensitively
                                  &quot;10%&quot;,  # tolerate up to 1 character in 10 being wrong
                                  &quot;S0&quot;,   # but no substituting one character for another
                                  &quot;D1&quot;,   # although, tolerate up to one deletion
                                  &quot;I2&quot;    # and tolerate up to two insertions
                                 ]);
  
}


ok(fuzm(&quot;homer_simpson&quot;),         &quot;exact match for 'homer_simpson'&quot;);
ok(fuzm(&quot;homersimpson&quot;),          &quot;still matches without the underscore&quot;);
ok(fuzm(&quot;homers_impson&quot;),         &quot;putting the underscore in a different place, still matches&quot;);
ok(fuzm(&quot;ho_mer_simpson&quot;),        &quot;an extra underscore still matches&quot;);
ok(fuzm(&quot;ho_mer_simp_son&quot;),       &quot;2 extra underscores still matches&quot;);
ok((not fuzm &quot;ho_mersimp_son&quot;),   &quot;2 underscores, both in the wrong places, doesn't match&quot;);
ok((not fuzm &quot;ho_mer_sim_ps_on&quot;), &quot;3 extra underscores doesn't match&quot;);
ok((not fuzm &quot;homer____simpson&quot;), &quot;3 extra underscores doesn't match&quot;);

Perl Perl关键字搜索（一个班轮）

#!/bin/sh

tail -n5000 somefile | perl -ne &quot;print $_ if /$1/i &amp;&amp; /$2/i &amp;&amp; /$3/i &amp;&amp; /$4/i &amp;&amp; /$5/i;&quot;

Perl 将（裁剪）双页PDF分成两部分

#!/usr/bin/env perl
use strict; use warnings;
use PDF::API2;

my $filename = shift || 'test.pdf';
my $oldpdf = PDF::API2-&gt;open($filename);
my $newpdf = PDF::API2-&gt;new;

for my $page_nb (1..$oldpdf-&gt;pages) {
  my ($page, @cropdata);
  
  $page = $newpdf-&gt;importpage($oldpdf, $page_nb);
  @cropdata = $page-&gt;get_mediabox;
  $cropdata[2] /= 2;
  $page-&gt;cropbox(@cropdata);
  $page-&gt;trimbox(@cropdata);
  $page-&gt;mediabox(@cropdata);

  $page = $newpdf-&gt;importpage($oldpdf, $page_nb);
  @cropdata = $page-&gt;get_mediabox;
  $cropdata[0] = $cropdata[2] / 2;
  $page-&gt;cropbox(@cropdata);
  $page-&gt;trimbox(@cropdata);
  $page-&gt;mediabox(@cropdata);
}

(my $newfilename = $filename) =~ s/(.*)\.(\w+)$/$1.clean.$2/;
$newpdf-&gt;saveas('$newfilename');

__END__