Perl 疯狂的Perl代码

$_ = shift;
s/\\/\//g;
print;

################

while(<>) {
   /^(\w+)/;
   print $_.',';
}

#############

if(($_ = 5)==5) {print;}

##############

$x = 7;
if (($x <=> 0)==($x=$x)) {print 'true';}
else{print 'false';}

#######################

print "Hello World" or die "trying";

######################

print eval qw(qw(qw()));

Perl 计算给定目录中的目录条目

#!/usr/bin/perl
$ARGV[0] = '.' unless @ARGV;
for my $dir (@ARGV) {
  opendir DIR, $dir or die "$dir: $!\n";
  $file =~ m:^\.: or ++$count
    while ($file = readdir DIR);
  closedir DIR;
}
print "$count\n";
exit 0;

Perl 备份您自己的snipplr.com片段

#!/usr/bin/perl
use strict;
use LWP::UserAgent;
my $silent = 0; # set to 1 for no output on stdout
my $logindata = {
    'username' => 'fill in your username',
    'password' => 'fill in your password',
                };
my $loginurl = 'http://snipplr.com/login/';
my $backupurl = 'http://snipplr.com/zipbackup.php';
my $backupdir = '.';

my $ua = LWP::UserAgent->new;
$ua->cookie_jar({ file => "cookies.txt" });

print qq{get cookie from frontpage...} unless $silent;
my $devnull = $ua->get('http://snipplr.com/'); #get cookie
print qq{done.\n} unless $silent;

print qq{login as $$logindata{'username'}...} unless $silent;
my $loginresponse = $ua->post($loginurl, $logindata);
print qq{done.\n} unless $silent;

print qq{exporting snipplr_backup.zip...} unless $silent;
my $backupfile = $ua->get($backupurl);
open (KI, ">$backupdir/snipplr_backup.zip");
binmode KI;
print KI $backupfile->content;
close (KI);
print qq{done.\n} unless $silent;

Perl 使用XML :: API :: XHTML在命令行上生成XHTML

perl -e "use XML::API::XHTML; my $d = new XML::API::XHTML(); $d->head_open(); $d->title('hello world!'); $d->script({type => 'text/javascript'}, '/* hello scripts! */'); $d->head_close(); $d->body_open(); $d->h1({style => 'color:red'}, 'Hi nerd!'); print $d;"  | tidy -q -o temp.html

Perl 日期功能

sub date_mysql2sec {
#takes: date in "yyyy-mm-dd hh:mm:ss" format (with some freedom)
#returns: date in seconds since 1970 format
    use Time::Local;# 'timelocal_nocheck';
    my $mysqldate = shift;
    $mysqldate =~ /(\d{4}).(\d{2}).(\d{2}).(\d{2}).(\d{2}).(\d{2})/;
    my ($sec,$min,$hour,$mday,$mon,$year) = ($6,$5,$4,$3,$2,$1);
    if ($mon != 0) {$mon--};
    return timelocal($sec,$min,$hour,$mday,$mon,$year);
}

sub date_sec2mysql {
#takes: date in seconds since 1970 format
#returns: date in yyyy-mm-dd hh:mm:ss format
    my $secdate = shift;
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = localtime($secdate);
    $year += 1900;
    $mon++;
    $mon = $mon < 10 ? "0$mon" : $mon;
    $mday = $mday < 10 ? "0$mday" : $mday;
    $sec = $sec < 10 ? "0$sec" : $sec;
    $min = $min < 10 ? "0$min" : $min;
    $hour = $hour < 10 ? "0$hour" : $hour;    
    return qq{$year-$mon-$mday $hour:$min:$sec};
}

sub date_mysql_now {
#Takes: nothing
#Returns: current date and time in yyyy-mm-dd hh:mm:ss format
    return date_sec2mysql(time);
}

Perl 基于MySQL的功能

sub db_connect {
    my ($dbname, $dbuser, $dbpass) = @_;
    my $dbh = DBI->connect("DBI:mysql:$dbname",$dbuser,$dbpass);
    #$dbh->do(qq{set character set 'utf8';});
    return $dbh;
}

sub do_sql {
# Takes: $dbh, $sql
# Returns: status
    my $dbh = shift || die "Database not connected!\n";
    my $sql = shift || die "Missing SQL statement???\n";
    return $dbh->do($sql);
}

sub execute_sql {
# Takes: $dbh, $sql
# Returns: $result_arrayref
    my $dbh = shift || die "Database not connected!\n";
    my $sql = shift || die "Missing SQL statement???\n";
    my $sth = $dbh->prepare($sql);
    $sth->execute;
    my $result = $sth->fetchall_arrayref({}); # {} => Return arrayref of hashrefs
    return $result;
}

sub do_insert {
#takes: $dbh, $table, $datahash
#returns: status
    my $dbh = shift || die "Database not connected!\n";
    my $table = shift || die "Missing table!\n";
    my $datahash = shift || die "Nothing to insert!\n";
    my $insert = "INSERT INTO $table (" . join(',', keys %$datahash) . ') VALUES (' . join(',', values %$datahash) . ');';
    return &do_sql($dbh, $insert);
}

Perl 在多个文件夹中的文本文件中查找和替换字符串

grep -rl target_string . | xargs perl -pi~ -e 's/target_string/replacement_string/g'

Perl 使用Perl正则表达式替换批量重命名文件

#!/usr/bin/env perl -w
use strict;

# Batch rename files with Perl regex substitutions
# Perl administration files rename
#
# Larry Wall's filename fixer: recipe 9.9 in Perl Cookbook 

$op = shift 
	or die "Usage: $0 expr [files]\n";
chomp(@ARGV = <STDIN>) unless @ARGV;
for (@ARGV) {
    $was = $_;
    eval $op;
    die $@ if $@;
    rename($was, $_) unless $was eq $_;
}

Perl 纽约时报报道RSS

#!/usr/bin/perl -w

use strict;
use LWP::Simple;
use HTML::TreeBuilder;
use LWP::Parallel::UserAgent;
use WWW::Mechanize;
use XML::TreeBuilder;
use Getopt::Long;
use HTTP::Cookies;
use Encode;

my $username;
my $password;
my $feedurl;

GetOptions(	"user=s"	=> \$username,
		"pass=s"	=> \$password,
		"url=s"		=> \$feedurl
);

print STDERR "Getting login page...\n";

my $cookiejar = HTTP::Cookies->new();

my $mech = WWW::Mechanize->new();

$mech->agent_alias('Linux Mozilla');
$mech->cookie_jar($cookiejar);

$mech->get("http://www.nytimes.com/auth/login");

my $loginresponse = $mech->submit_form(
	form_name	=>	'login',
	fields		=>	{
		USERID		=>	$username,
		PASSWORD	=>	$password
	}
);
		

unless ($loginresponse->is_success()) {
	die("Error logging in!\n");
}

print STDERR "Logged in successfully!\n";

my $pua = LWP::Parallel::UserAgent->new();
$pua->cookie_jar($cookiejar);
$pua->redirect(1);


print STDERR "Getting XML...\n";
my $xml = get($feedurl);
my $feed = XML::TreeBuilder->new();
$feed->parse($xml);
my %entries;

print STDERR "Grabbing links...\n";

foreach my $item ($feed->look_down("_tag", "item")) {
	my $link = $item->look_down("_tag", "link")->as_text();
	$link =~ s/\?.*//;

	my $request = HTTP::Request->new();

	print STDERR "Registering $link...\n";
	
	$request->uri($link,);
	$request->method("GET");

	$pua->register($request);

	$entries{$link} = $item;
}

print STDERR "Downloading HTML...\n";
my $html = $pua->wait();

foreach my $entry (values(%$html)) {
	my $response = $entry->response();
	my $url = $response->base()->as_string();
	$url =~ s/\?.*//;

	print STDERR "Processing $url...\n";

	my $item = $entries{$url};

	my $articlehtml = HTML::TreeBuilder->new_from_content(
		decode_utf8($response->content())
	);

	if (my $redirelem = $articlehtml->look_down("_tag", "meta", "http-equiv", "refresh")) {
		print STDERR "Interstitial ad detected, skipping...\n";
		my $newurl = $redirelem->attr_get_i("content");

		($newurl) = ($newurl =~ m/url=(.*?)/);
		$newurl = "http://www.nytimes.com" . $newurl;

		print STDERR "Redirect URL is $newurl...\n";
	}


	# Let's clean this up for Liferea, shall we?
	my $messyelement;

	foreach ($articlehtml->look_down("_tag", "div", "class", "enlargeThis")) {
		$_->delete();
	}
	if ($messyelement = $articlehtml->look_down("_tag", "div", "class", "nextArticleLink")) {
		$messyelement->delete();
	}

	my $description = $entries{$url}->look_down("_tag", "description");
	$description->delete_content();

	foreach ($articlehtml->look_down("_tag", "div", "class", "image")) {
		$description->push_content($_->as_HTML());
	}

	if ($messyelement = $articlehtml->look_down("_tag", "div", "id", "articleInline")) {
		$messyelement->delete();
	}

	my %pages;
	my @pages;
	my $pageua = LWP::Parallel::UserAgent->new();
	$pageua->cookie_jar($cookiejar);

	if (my $pageelem = $articlehtml->look_down("_tag", "div", "id", "pageLinks")) {
		print STDERR "Multiple pages detected...\n";

		foreach ($pageelem->look_down("_tag", "a", "title", qr/^Page/)) {
			my $pageurl = "http://www.nytimes.com" . $_->attr_get_i("href");
			print STDERR "Registering $pageurl...\n";

			push(@pages, $pageurl);
			my $pagerequest = HTTP::Request->new();
			$pagerequest->uri($pageurl);
			$pagerequest->method('GET');

			$pageua->register($pagerequest);			
		}

		print STDERR "Downloading pages...\n";

		$pageelem->delete();
	}

	foreach ($articlehtml->look_down("_tag", "div", "id", "articlebody")) {
		$description->push_content($_->as_HTML());
	}

	my $htmlpages = $pageua->wait();

	if ($htmlpages) {
		print STDERR "Sorting pages...\n";
		foreach (values(%$htmlpages)) {
			my $pageresponse = $_->response();
			my $responseurl = $pageresponse->base()->as_string();

			$pages{$responseurl} = $pageresponse;
		}

		foreach (@pages) {
			print STDERR "Processing $_...\n";

			my $pagehtml = HTML::TreeBuilder->new_from_content(
				decode_utf8($pages{$_}->content())
			);

			# Let's clean this up for Liferea, shall we?

			if ($messyelement = $pagehtml->look_down("_tag", "div", "class", "enlargeThis")) {
				$messyelement->delete();
			}
			if ($messyelement = $pagehtml->look_down("_tag", "div", "class", "nextArticleLink")) {
				$messyelement->delete();
			}

			foreach ($pagehtml->look_down("_tag", "div", "class", "image")) {
				$description->push_content($_->as_HTML());
			}
		
			if ($messyelement = $pagehtml->look_down("_tag", "div", "id", "articleInline")) {
				$messyelement->delete();
			}

			if ($messyelement = $pagehtml->look_down("_tag", "div", "id", "pageLinks")) {
				$messyelement->delete();
			}

			foreach ($pagehtml->look_down("_tag", "div", "id", "articlebody")) {
				my $content = $_->as_HTML();
				$content =~ s/\(Page \d+ of \d+\)//g;
				$description->push_content($content);
			}
		}
	}
}

print $feed->as_XML();

Perl 让你的战争刮到RSS

#!/usr/bin/perl

use HTML::Entities;
use LWP::Simple;

# print a feed header
print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n".
	"<rdf:RDF\n".
	"xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n".
	"xmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n".
	"xmlns=\"http://my.netscape.com/rdf/simple/0.9/\">\n".
	"<channel>\n".
	"  <title>Get Your War On</title>\n".
	"  <link>http://www.mnftiu.cc/mnftiu.cc/war.html</link>\n".
	"  <description>A webcomic about our 9/11 epilogue.</description>\n".
	"</channel>\n\n";

$html_string = get ("http://www.mnftiu.cc/mnftiu.cc/war.html");

$i = 2;

while ($html_string =~ m/<a href="war(\d|\d\d).html">(\d|\d\d)<\/a>/g)
{
	$i++
}

$url = "http://www.mnftiu.cc/mnftiu.cc/war" . $i . ".html";

$html_string = get ($url);

while ($html_string =~ m/<img src="images\/gywo.(.*?).gif" border=0>/g)
{
	print	"<item>\n".
		"<title>" . $1 . "</title>\n".
	 	"<link>" . $url . "</link>\n".
		"<description><img src=\"http://www.mnftiu.cc/mnftiu.cc/images/gywo." . $1 . ".gif\"></description>\n";

	print "</item>\n\n";
}

print "</rdf:RDF>\n";