#!/usr/bin/perl -w use strict; use LWP::UserAgent; use XML::Simple; use Term::ReadKey; use Getopt::Long; use Data::Dumper; my $username = 'Ilmari Karonen'; my $server = 'en.wikipedia.org'; GetOptions( 'username|user|u=s' => \$username, 'server|s=s' => \$server, ) and @ARGV or die "Usage: $0 [-u ] [-s ] \n";; my $apiURI = "http://$server/w/api.php"; ReadMode 'noecho'; print STDERR "Password for $username \@ $server: "; my $pass = ; chomp $pass; print STDERR "\n"; ReadMode 'restore'; my $ua = LWP::UserAgent->new( agent => "Mozilla/4.0 (compatible; $0)", from => 'vyznev@toolserver.org', cookie_jar => {}, parse_head => 0, ); sub apireq { my $query = {format => 'xml', @_}; my $sleep = 1; while (1) { my $res = $ua->post($apiURI, $query); return XMLin( $res->content ) if $res->is_success; warn "API request failed: ", $res->status_line, ". Sleeping $sleep seconds...\n"; sleep $sleep; $sleep *= 2; } } my $login = apireq( action => 'login', lgname => $username, lgpassword => $pass ); $login->{login}{result} eq 'Success' or die "Login failed: $login->{login}{result}\n"; my $transcl = qr/\{\{[ _]*(msg[ _]*:[ _]*)?(?i:Template[ _]*:[ _]*)?[Rr]td[ _]*(\}\}|\|)/; my $substed = qr/\[\[[ _]*(?i:Category[ _]*:[ _]*T)alk[ _]+pages[ _]+of[ _]+(the[ _]+)?deleted[ _]+replaceable[ _]+fair[ _]+use[ _]+images[ _]*(\]\]|\|)/; $| = 1; while (my $line = ) { chomp $line; my ($title, $deltime, $uid, $uname, $comment) = split /\t/, $line; next if $deltime lt '20061205125500'; # creation date of Template:Rtd next if $comment =~ /g8/i; # did these already print STDERR "\r", $., "\t"; my $data = apireq( action => 'query', list => 'deletedrevs', titles => "Image talk:$title", drstart => $deltime || "", drprop => 'content', drlimit => '1', ); # exists $data->{query}{deletedrevs}{page}{revisions} # or die "No deleted revisions returned for $title ($deltime)!\n", # Data::Dumper->new([$data])->Dump, "\n"; my $text = $data->{query}{deletedrevs}{page}{revisions}{rev}{content} or next; if ($text =~ $transcl) { print "match\t$line\n"; } elsif ($text =~ $substed) { print "substed\t$line\n"; } }