#!/usr/bin/perl -w # # Copyright (c) 2009, Ilmari Karonen # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. use strict; use CGI::Carp qw'fatalsToBrowser warningsToBrowser'; use CGI qw'escapeHTML'; use URI::Escape qw'uri_escape_utf8'; use DBI; use POSIX qw'strftime'; use constant NS_FILE => 6; # from MediaWiki use constant LIMIT_DEF => 10; use constant LIMIT_MAX => 100; my $conf = "/home/vyznev/.my.cnf"; my $logfile = "/home/vyznev/checkusage.log"; # Print HTTP/CGI headers: binmode STDOUT, ":utf8"; print "Content-type: text/html;charset=utf-8\n\n"; warningsToBrowser(1); # print HTML head and preamble: print <<"ENDOFHTML"; Quick and dirty Wikimedia file usage check

Quick and dirty Wikimedia file usage check

ENDOFHTML # Parse request parameters: my $q = CGI->new; my $limit = $q->param("limit"); $limit = LIMIT_DEF unless defined($limit) and $limit =~ /\d/ and $limit !~ /[^\s\d.]/; $limit = int($limit); $limit = 0 if $limit < 0; $limit = LIMIT_MAX if $limit > LIMIT_MAX; my @files = grep /[^\s_]/, map split(/\n/), $q->param("files"); my (@ucfiles, @lcfiles); # kluge for Wiktionary and other case-sensitive projects for (@files) { s/\xE2\x80[\x8E\x8F\xAA-\xAE]//g; # strip invisible Unicode chars s/[\s_]+/_/g; s/^_//; s/_$//; # collapse spaces # convert to upper case: utf8::decode($_); my $uc = ucfirst($_); my $lc = lcfirst($_); push @ucfiles, $uc; push @lcfiles, $lc if $lc ne $uc; } @files = @ucfiles; # Log requested titles: if (open LOG, ">>:utf8", $logfile) { print LOG join(" ", strftime("%Y-%m-%d %H:%M:%S:", gmtime), @files, "(limit=$limit)\n"); close LOG; } else { alert("Could not open $logfile for append: $!"); } # Print query form: my $selfURL = escapeHTML($q->url); $selfURL =~ s/%0[AD]//ig; # XXX KLUGE WTF? my $filesArea = escapeHTML(join "\n", @files); $filesArea =~ tr/_/ /; print <<"ENDOFHTML";

Enter names of files to be checked below, one per line, without the "File:" / "Image:" prefix.


(Show max. results per wiki per file.)
ENDOFHTML print("\n"), exit unless @files; # Read list of wikis: my %private = qw(); my @wikis = grep /\S/ && !/^ilwikimedia$/ && !/^(board|chair|grants|internal|office|wikimaniateam|exec|wg_en|\w+com|arbcom_\w+|otrs_wiki)wiki$/, ; chomp @wikis; my %hostname = (foundationwiki => "wikimediafoundation.org", mediawikiwiki => "www.mediawiki.org", sourceswiki => "wikisource.org", ); my %wikimedia_org = map +($_ => $_), qw(commons meta advisory incubator collab usability strategy); foreach my $dbname (@wikis) { next if exists $hostname{$dbname}; my ($lang, $project) = ($dbname =~ /^([_a-z0-9]+)(wiki(?:books|media|news|quote|source|versity)?|wiktionary)$/); if ($lang and $project) { if ($project eq "wiki") { # mixed bag :( if ($lang =~ /^(commons|meta|advisory|incubator|collab|usability|strategy|wikimania\d+)$/) { $project = "wikimedia"; } else { $project = "wikipedia"; } } tr/_/-/, s/-labs$/.labs/, s/-us/.us/ for $lang; $hostname{$dbname} = "$lang.$project.org"; } else { alert("Cannot determine host name for $dbname"); } } # Main CheckUsage loop: my $i = 0; my $n = @wikis; my $cu_sql; # SQL query to find file uses, cached my %list_sql; my (%wikicount, %pagecount); my @failed; print '

Checking ', (@files>1 ? @files." files" : "one file"), "...

\n"; foreach my $dbname (@wikis) { $i++; my $hostname = $hostname{$dbname} or next; # DEBUG: #use Socket; #alert("Invalid hostname $hostname obtained for $dbname") unless defined gethostbyname($hostname); # Show JS progress indicator: (my $hostname_safe = $hostname) =~ tr/-.a-zA-Z0-9//cd; print qq(\n); { local $| = 1; } # flush stdout # Connect to database: my $database = $dbname . "_p"; my $dbhost = $dbname . "-p.db.toolserver.org"; $database =~ tr/-/_/; $dbhost =~ tr/_/-/; my $data_source = "DBI:mysql:database=$database;host=$dbhost;mysql_read_default_group=client;mysql_read_default_file=$conf"; my $dbh = eval { DBI->connect($data_source, undef, undef, { RaiseError => 1 }) } or alert("SQL connect to $dbname failed: $@") and push(@failed, $hostname) and next; $dbh->do("SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED"); # Construct and execute query: $cu_sql ||= # no need to rebuild this for each DB ("SELECT il_to, COUNT(*) FROM page, imagelinks" . " WHERE page_id = il_from AND il_to IN (" . join(", ", map $dbh->quote($_), @files, @lcfiles) . ") GROUP BY il_to"); my $cu_rows = eval { $dbh->selectall_arrayref($cu_sql) } or alert("SQL query for $dbname failed: $@") and push(@failed, $hostname) and next; next unless @$cu_rows; # Fecth lists and display results: print "

", escapeHTML($hostname), "

\n"; foreach my $cu_row (@$cu_rows) { my ($file, $count) = @$cu_row; utf8::decode($file); $pagecount{$file} += $count; $wikicount{$file}++; print "\n

", wikilink($hostname, NS_FILE, $file), " is used on ", ($limit>0 && $count>$limit ? "$count pages (first $limit shown)" : $count>1 ? "$count pages" : "one page"), ($limit>0?":":"."), "

\n"; next unless $limit > 0; $list_sql{$file} ||= # no need to rebuild this either ("SELECT page_namespace, page_title FROM page, imagelinks" . " WHERE page_id = il_from AND il_to = " . $dbh->quote($file) . " ORDER BY page_namespace, page_title LIMIT $limit"); my $list_rows = eval { $dbh->selectall_arrayref($list_sql{$file}) } or alert("SQL query for $file at $dbname failed: $@") and next; print "\n"; } } # Display summary of results: print qq(

Summary:

\n); print qq(\n); print "\n"; if (@failed) { print qq(
\nI was unable to determine usage on the following projects:\n
    \n); foreach my $hostname (@failed) { print "
  • ", escapeHTML($hostname), ": ", join(", ", map wikilink($hostname, NS_FILE, $_, "#filelinks"), @files, ($hostname =~ /\.wiktionary\.org$/ ? @lcfiles : ())), "
  • \n"; } print qq(
\nYou may use the links above to check them yourself.\n
\n); } # End of program: print "\n"; exit; # Misc. subroutines: sub alert { print '

', map(escapeHTML($_), @_), "

\n"; return 1; } use constant NSNAMES => ["", "Talk", map +($_, $_."_talk"), qw"User Project File MediaWiki Template Help Category"]; sub wikilink { my ($host, $ns, $title, $append) = @_; my ($href, $text); if ($ns < @{+NSNAMES}) { my $prefix = NSNAMES->[$ns]; $prefix .= ":" if $prefix; $text = $prefix . $title; $href = "http://$host/w/index.php?title=" . uri_escape_utf8($text); } else { # XXX: oh dear, a custom namespace :( # can't make a proper link, have to kluge it $text = "{{ns:$ns}}:$title"; $href = "http://$host/wiki/Special:Search?ns$ns=1&fulltext=1&search=" . uri_escape_utf8($title); } $text =~ tr/_/ /; $href .= $append if defined $append; return '' . escapeHTML($text) . ''; } __DATA__ aawiki aawikibooks aawiktionary abwiki abwiktionary advisorywiki afwiki afwikibooks afwikiquote afwiktionary akwiki akwikibooks akwiktionary alswiki alswikibooks alswikiquote alswiktionary amwiki amwikiquote amwiktionary angwiki angwikibooks angwikiquote angwikisource angwiktionary anwiki anwiktionary arcwiki arwiki arwikibooks arwikinews arwikiquote arwikisource arwiktionary arzwiki astwiki astwikibooks astwikiquote astwiktionary aswiki aswikibooks aswiktionary avwiki avwiktionary aywiki aywikibooks aywiktionary azwiki azwikibooks azwikiquote azwikisource azwiktionary barwiki bat_smgwiki bawiki bawikibooks bawiktionary betawikiversity bewiki bewikibooks bewikiquote bewiktionary be_x_oldwiki bgwiki bgwikibooks bgwikinews bgwikiquote bgwikisource bgwiktionary bhwiki bhwiktionary biwiki biwikibooks biwiktionary bmwiki bmwikibooks bmwikiquote bmwiktionary bnwiki bnwikibooks bnwiktionary boardwiki bowiki bowikibooks bowiktionary bpywiki brwiki brwiktionary bswiki bswikibooks bswikinews bswikiquote bswikisource bswiktionary bugwiki bxrwiki cawiki cawikibooks cawikinews cawikiquote cawikisource cawiktionary cbk_zamwiki cdowiki cebwiki cewiki chairwiki chapcomwiki chowiki chrwiki chrwiktionary chwiki chwikibooks chwikimedia chwiktionary chywiki commonswiki cowiki cowikibooks cowikiquote cowiktionary crwiki crwikiquote crwiktionary csbwiki csbwiktionary cswiki cswikibooks cswikiquote cswikisource cswiktionary cuwiki cvwiki cvwikibooks cywiki cywikibooks cywikiquote cywikisource cywiktionary dawiki dawikibooks dawikiquote dawikisource dawiktionary dewiki dewikibooks dewikinews dewikiquote dewikisource dewikiversity dewiktionary diqwiki dvwiki dvwiktionary dzwiki dzwiktionary eewiki elwiki elwikibooks elwikiquote elwikisource elwiktionary emlwiki enwiki enwikibooks enwikinews enwikiquote enwikisource enwikiversity enwiktionary eowiki eowikibooks eowikiquote eowiktionary eswiki eswikibooks eswikinews eswikiquote eswikisource eswikiversity eswiktionary etwiki etwikibooks etwikiquote etwikisource etwiktionary euwiki euwikibooks euwikiquote euwiktionary fawiki fawikibooks fawikiquote fawikisource fawiktionary ffwiki fiu_vrowiki fiwiki fiwikibooks fiwikiquote fiwikisource fiwiktionary fjwiki fjwiktionary foundationwiki fowiki fowikisource fowiktionary frpwiki frwiki frwikibooks frwikinews frwikiquote frwikisource frwikiversity frwiktionary furwiki fywiki fywikibooks fywiktionary gawiki gawikibooks gawikiquote gawiktionary gdwiki gdwiktionary glkwiki glwiki glwikibooks glwikiquote glwikisource glwiktionary gnwiki gnwikibooks gnwiktionary gotwiki gotwikibooks grantswiki guwiki guwikibooks guwikiquote guwiktionary gvwiki gvwiktionary hawiki hawiktionary hawwiki hewiki hewikibooks hewikinews hewikiquote hewikisource hewiktionary hiwiki hiwikibooks hiwikiquote hiwiktionary howiki hrwiki hrwikibooks hrwikiquote hrwikisource hrwiktionary hsbwiki htwiki htwikisource huwiki huwikibooks huwikiquote huwikisource huwiktionary hywiki hywikibooks hywikiquote hywiktionary hzwiki iawiki iawikibooks iawiktionary idwiki idwikibooks idwikiquote idwikisource idwiktionary iewiki iewikibooks iewiktionary igwiki iiwiki ikwiki ikwiktionary ilowiki ilwikimedia incubatorwiki internalwiki iowiki iowiktionary iswiki iswikibooks iswikiquote iswikisource iswiktionary itwiki itwikibooks itwikinews itwikiquote itwikisource itwiktionary iuwiki iuwiktionary jawiki jawikibooks jawikinews jawikiquote jawikisource jawiktionary jbowiki jbowiktionary jvwiki jvwiktionary kawiki kawikibooks kawikiquote kawiktionary kgwiki kiwiki kjwiki kkwiki kkwikibooks kkwikiquote kkwiktionary klwiki klwiktionary kmwiki kmwikibooks kmwiktionary knwiki knwikibooks knwikiquote knwikisource knwiktionary kowiki kowikibooks kowikiquote kowikisource kowiktionary krwiki krwikiquote kshwiki kswiki kswikibooks kswikiquote kswiktionary kuwiki kuwikibooks kuwikiquote kuwiktionary kvwiki kwwiki kwwikiquote kwwiktionary kywiki kywikibooks kywikiquote kywiktionary ladwiki lawiki lawikibooks lawikiquote lawikisource lawiktionary lbewiki lbwiki lbwikibooks lbwikiquote lbwiktionary lgwiki lijwiki liwiki liwiktionary lmowiki lnwiki lnwikibooks lnwiktionary lowiki lowiktionary ltwiki ltwikibooks ltwikiquote ltwikisource ltwiktionary lvwiki lvwikibooks lvwiktionary map_bmswiki mediawikiwiki metawiki mgwiki mgwikibooks mgwiktionary mhwiki mhwiktionary miwiki miwikibooks miwiktionary mkwiki mkwikibooks mkwikisource mkwiktionary mlwiki mlwikibooks mlwikiquote mlwikisource mlwiktionary mnwiki mnwikibooks mnwiktionary mowiki mowiktionary mrwiki mrwikibooks mrwikiquote mrwiktionary mswiki mswikibooks mswiktionary mtwiki mtwiktionary muswiki mywiki mywikibooks mywiktionary mznwiki nahwiki nahwikibooks nahwiktionary napwiki nawiki nawikibooks nawikiquote nawiktionary nds_nlwiki ndswiki ndswikibooks ndswikiquote ndswiktionary newiki newikibooks newiktionary newwiki ngwiki nlwiki nlwikibooks nlwikimedia nlwikinews nlwikiquote nlwikisource nlwiktionary nnwiki nnwikiquote nnwiktionary nostalgiawiki novwiki nowiki nowikibooks nowikinews nowikiquote nowikisource nowiktionary nrmwiki nvwiki nywiki nzwikimedia ocwiki ocwikibooks ocwiktionary officewiki omwiki omwiktionary orwiki orwiktionary oswiki pagwiki pamwiki papwiki pawiki pawikibooks pawiktionary pdcwiki pihwiki piwiki piwiktionary plwiki plwikibooks plwikimedia plwikinews plwikiquote plwikisource plwiktionary pmswiki pswiki pswikibooks pswiktionary ptwiki ptwikibooks ptwikinews ptwikiquote ptwikisource ptwiktionary quwiki quwikibooks quwikiquote quwiktionary rmwiki rmwikibooks rmwiktionary rmywiki rnwiki rnwiktionary roa_rupwiki roa_rupwiktionary roa_tarawiki rowiki rowikibooks rowikinews rowikiquote rowikisource rowiktionary ruwiki ruwikibooks ruwikinews ruwikiquote ruwikisource ruwiktionary rwwiki rwwiktionary sawiki sawikibooks sawiktionary scnwiki scnwiktionary scowiki scwiki scwiktionary sdwiki sdwikinews sdwiktionary searchcomwiki sewiki sewikibooks sgwiki sgwiktionary shwiki shwiktionary simplewiki simplewikibooks simplewikiquote simplewiktionary siwiki siwikibooks siwiktionary skwiki skwikibooks skwikiquote skwikisource skwiktionary slwiki slwikibooks slwikiquote slwikisource slwiktionary smwiki smwiktionary snwiki snwiktionary sourceswiki sowiki sowiktionary spcomwiki specieswiki sqwiki sqwikibooks sqwikiquote sqwiktionary srwiki srwikibooks srwikinews srwikiquote srwikisource srwiktionary sswiki sswiktionary stwiki stwiktionary suwiki suwikibooks suwikiquote suwiktionary svwiki svwikibooks svwikinews svwikiquote svwikisource svwiktionary swwiki swwikibooks swwiktionary tawiki tawikibooks tawikinews tawikiquote tawiktionary testwiki tetwiki tewiki tewikibooks tewikiquote tewikisource tewiktionary tgwiki tgwikibooks tgwiktionary thwiki thwikibooks thwikinews thwikiquote thwikisource thwiktionary tiwiki tiwiktionary tkwiki tkwikibooks tkwikiquote tkwiktionary tlwiki tlwikibooks tlwiktionary tnwiki tnwiktionary tokiponawiki tokiponawikibooks tokiponawikiquote tokiponawiktionary towiki towiktionary tpiwiki tpiwiktionary trwiki trwikibooks trwikiquote trwikisource trwiktionary tswiki tswiktionary ttwiki ttwikibooks ttwikiquote ttwiktionary tumwiki twwiki twwiktionary tywiki udmwiki ugwiki ugwikibooks ugwikiquote ugwiktionary ukwiki ukwikibooks ukwikinews ukwikiquote ukwikisource ukwiktionary urwiki urwikibooks urwikiquote urwiktionary uzwiki uzwikibooks uzwikiquote uzwiktionary vecwiki vewiki viwiki viwikibooks viwikiquote viwikisource viwiktionary vlswiki vowiki vowikibooks vowikiquote vowiktionary warwiki wawiki wawikibooks wawiktionary wikimania2005wiki wikimania2006wiki wikimania2007wiki wikimaniateamwiki wowiki wowikiquote wowiktionary wuuwiki xalwiki xhwiki xhwikibooks xhwiktionary yiwiki yiwikisource yiwiktionary yowiki yowikibooks yowiktionary zawiki zawikibooks zawikiquote zawiktionary zeawiki zh_classicalwiki zh_min_nanwiki zh_min_nanwikibooks zh_min_nanwikiquote zh_min_nanwikisource zh_min_nanwiktionary zh_yuewiki zhwiki zhwikibooks zhwikinews zhwikiquote zhwikisource zhwiktionary zuwiki zuwikibooks zuwiktionary kabwiki tawikisource itwikiversity rswikimedia hakwiki auditcomwiki pa_uswikimedia qualitywiki bnwikisource hsbwiktionary wikimania2008wiki otrs_wikiwiki fiwikinews hywikisource brwikiquote bclwiki liwikiquote execwiki elwikiversity stqwiki crhwiki dsbwiki wg_enwiki de_labswikimedia en_labswikimedia arbcom_enwiki sewikimedia wikimania2009wiki huwikinews myvwiki extwiki ganwiki jawikiversity cswikinews hifwiki kaawiki mdfwiki sahwiki szlwiki srnwiki liwikisource cswikiversity ptwikiversity collabwiki nomcomwiki arbcom_dewiki arbcom_nlwiki nowikimedia ukwikimedia pntwiki fiwikiversity usabilitywiki wikimania2010wiki ruwikimedia mhrwiki strategywiki uawikimedia cowikimedia ckbwiki pnbwiki mwlwiki acewiki trwikinews