<?
define("WS_ADMIN",true);
require_once( "../common/WSInit.php" );
#-- Config ----------------------------------------------
#$wsgWikiListDB= 'toolserver';
#$wsgWikiListTable= 'wiki';
#FIXME: add is_private flag
#NOTE: hard-coded special cases
$wikis= array(
'closed_zh_twwiki' => array(
'lang' => 'zh',
'domain' => NULL, #'zh-tw.wikipedia.org',
'is_closed' => 1,
),
'tlhwiki' => array(
'lang' => 'tlh',
'domain' => NULL, #'tlh.wikipedia.org',
'is_closed' => 1,
),
'pa_uswikimedia' => array(
'lang' => 'pa-us',
'domain' => 'pa.us.wikimedia.org',
'is_meta' => 1,
),
'be_x_oldwiki' => array(
'lang' => 'be-x-old',
'domain' => 'be-x-old.wikipedia.org',
),
'betawikiversity' => array(
'lang' => 'en',
'family' => 'wikiversity',
'domain' => 'beta.wikiversity.org',
'is_multilang' => 1,
'is_meta' => 0, #well, it's both, meta and incubator...
),
'nostalgiawiki' => array(
'lang' => 'en',
'domain' => 'nostalgia.wikipedia.org',
'is_closed' => 1,
),
'sep11wiki' => array(
'lang' => 'en',
'family' => 'memoriam',
'domain' => 'sep11.wikipedia.org',
),
'commonswiki' => array(
'lang' => 'en',
'family' => 'commons',
'domain' => 'commons.wikimedia.org',
'is_multilang' => 1,
'is_meta' => 0,
'root_category' => 'CommonsRoot', #TODO: make this 'Categories' eventually...
'server' => 2,
),
'advisorywiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'advisory.wikimedia.org',
'is_multilang' => 1,
'is_meta' => 1,
),
'sourceswiki' => array(
'lang' => 'en',
'family' => 'wikisource',
'domain' => 'wikisource.org',
'is_multilang' => 1,
),
'specieswiki' => array(
'lang' => 'en',
'family' => 'wikispecies',
'domain' => 'species.wikimedia.org',
'is_multilang' => 1,
),
'metawiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'meta.wikimedia.org',
'is_meta' => 1,
),
'boardwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'board.wikimedia.org',
'is_meta' => 1,
'is_closed' => 1,
),
'foundationwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'wikimediafoundation.org',
'is_meta' => 1,
),
'qualitywiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'quality.wikimedia.org',
'is_meta' => 1,
),
'mediawikiwiki' => array(
'lang' => 'en',
'family' => 'mediawiki',
'domain' => 'www.mediawiki.org',
'is_meta' => 1,
),
'grantswiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'grants.wikimedia.org',
'is_meta' => 1,
),
'collabwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'collab.wikimedia.org',
'is_meta' => 1,
),
'usabilitywiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'usability.wikimedia.org',
'is_meta' => 1,
),
'outreachwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'outreach.wikimedia.org',
'is_meta' => 1,
),
/*
'wikimaniawiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'wikimania2005.wikimedia.org',
'is_meta' => 1,
),
'wikimania2006wiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'wikimania2006.wikimedia.org',
'is_meta' => 1,
),
'wikimania2007wiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'wikimania2007.wikimedia.org',
'is_meta' => 1,
),
'wikimania2008wiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'wikimania2008.wikimedia.org',
'is_meta' => 1,
),
*/
'wikimaniateamwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'wikimaniateam.wikimedia.org',
'is_meta' => 1,
),
'incubatorwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'incubator.wikimedia.org',
'is_meta' => 0,
'is_multilang' => 1,
),
'chwikimedia' => array(
'lang' => 'de',
'family' => 'wikimedia',
'domain' => 'ch.wikimedia.org',
'is_meta' => 1,
'is_multilang' => 1,
),
'nzwikimedia' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'nz.wikimedia.org',
'is_meta' => 1,
),
/*
'chapcomwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'chapcom.wikimedia.org',
'is_meta' => 1,
),
'comcomwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'comcom.wikimedia.org',
'is_meta' => 1,
),
*/
'testwiki' => array(
'lang' => 'en',
'family' => 'wikimedia',
'domain' => 'test.wikipedia.org',
'is_meta' => 1,
),
'arbcom_enwiki' => false,
'internalwiki' => false,
'devwikiinternal' => false,
'wikimaniawiki' => false,
'zh_cnwiki' => false,
'minnanwiki' => false,
'demowikinews' => false,
'quotewiki' => false,
'zh_twwiki' => false,
'ru_sibwiki' => false,
'nomcomwiki' => false,
'l10nwiki' => false,
'aawikibooks' => array('is_closed' => true),
'aawiki' => array('is_closed' => true),
'aawiktionary' => array('is_closed' => true),
'abwiktionary' => array('is_closed' => true),
'akwikibooks' => array('is_closed' => true),
'akwiktionary' => array('is_closed' => true),
'angwikisource' => array('is_closed' => true),
'aswikibooks' => array('is_closed' => true),
'aswiktionary' => array('is_closed' => true),
'avwiktionary' => array('is_closed' => true),
'aywikibooks' => array('is_closed' => true),
'bawikibooks' => array('is_closed' => true),
'bhwiktionary' => array('is_closed' => true),
'biwikibooks' => array('is_closed' => true),
'biwiktionary' => array('is_closed' => true),
'bmwikibooks' => array('is_closed' => true),
'bmwikiquote' => array('is_closed' => true),
'bmwiktionary' => array('is_closed' => true),
'bowikibooks' => array('is_closed' => true),
'bowiktionary' => array('is_closed' => true),
'chowiki' => array('is_closed' => true),
'chwikibooks' => array('is_closed' => true),
'chwiktionary' => array('is_closed' => true),
'crwikiquote' => array('is_closed' => true),
'crwiktionary' => array('is_closed' => true),
'dkwiktionary' => array('is_closed' => true),
'dzwiktionary' => array('is_closed' => true),
'gawikibooks' => array('is_closed' => true),
'gawikiquote' => array('is_closed' => true),
'gnwikibooks' => array('is_closed' => true),
'gotwikibooks' => array('is_closed' => true),
'guwikibooks' => array('is_closed' => true),
'howiki' => array('is_closed' => true),
'htwikisource' => array('is_closed' => true),
'hzwiki' => array('is_closed' => true),
'iiwiki' => array('is_closed' => true),
'kjwiki' => array('is_closed' => true),
'krwikiquote' => array('is_closed' => true),
'krwiki' => array('is_closed' => true),
'kswikibooks' => array('is_closed' => true),
'kswikiquote' => array('is_closed' => true),
'kwwikiquote' => array('is_closed' => true),
'lnwikibooks' => array('is_closed' => true),
'mhwiki' => array('is_closed' => true),
'mhwiktionary' => array('is_closed' => true),
'miwikibooks' => array('is_closed' => true),
'mowiki' => array('is_closed' => true),
'mowiktionary' => array('is_closed' => true),
'muswiki' => array('is_closed' => true),
'mywikibooks' => array('is_closed' => true),
'nahwikibooks' => array('is_closed' => true),
'nawikiquote' => array('is_closed' => true),
'ndswikiquote' => array('is_closed' => true),
'nostalgiawiki' => array('is_closed' => true),
'orwiktionary' => array('is_closed' => true),
'piwiktionary' => array('is_closed' => true),
'quwikibooks' => array('is_closed' => true),
'quwikiquote' => array('is_closed' => true),
'rmwikibooks' => array('is_closed' => true),
'rmwiktionary' => array('is_closed' => true),
'scwiktionary' => array('is_closed' => true),
'sewikibooks' => array('is_closed' => true),
'snwiktionary' => array('is_closed' => true),
'tkwikiquote' => array('is_closed' => true),
'tlhwiki' => array('is_closed' => true),
'tokiponawikibooks' => array('is_closed' => true),
'tokiponawikiquote' => array('is_closed' => true),
'tokiponawiki' => array('is_closed' => true),
'tokiponawiktionary' => array('is_closed' => true),
'towiktionary' => array('is_closed' => true),
'ttwikiquote' => array('is_closed' => true),
'twwiktionary' => array('is_closed' => true),
'ugwikibooks' => array('is_closed' => true),
'ugwikiquote' => array('is_closed' => true),
'vowikiquote' => array('is_closed' => true),
'xhwiktionary' => array('is_closed' => true),
'yowiktionary' => array('is_closed' => true),
'zawikiquote' => array('is_closed' => true),
#'idwikisource' => false, #FIXME: why is this not accessible?!
#'zh_cnwiki' => false, #FIXME: why is this not accessible?!
);
#if (!isset($brokenWikis)) $brokenWikis= array();
$rootCategories= array();
#--------------------------------------------------------
function makeWikiInfo( $dbname, $db, $server = 1 ) {
global $wikis;
#global $wsgWikiDB;
global $rootCategories;
global $brokenWikis;
global $fastMode;
#global $touchy;
#global $thedb;
#global $recycle;
$dbname= strtolower($dbname);
$m= array();
$basename= $dbname;
if (preg_match('/^(.*)_p$/',$basename,$m)) $basename= $m[1];
$domain = NULL;
if (preg_match('/^(wikimania\d+)(wiki)?$/',$basename,$m)) {
$lang = "en";
$family = "wikimania";
$domain = $m[1].".wikimedia.org";
}
else if (preg_match('/^(\w+com)wiki$/',$basename,$m)) {
$lang = "en";
$family = "wikimedia";
$domain = $m[1].".wikimedia.org";
}
else if (preg_match('/^(\w+)(wiki(\w*)|wikt(ionary)?)$/',$basename,$m)) {
$lang = $m[1];
$family = $m[2];
if (preg_match('/^(\w+)_labs/', $lang, $m)) {
$family = "labs";
$lang = $m[1];
$domain = "$lang.labs.wikimedia.org";
}
}
else {
return false;
}
$info= @$wikis[$basename];
if (is_null($info)) $info= array();
else if ($info===false) return false;
wsfLog("inspecting database $dbname ($basename)",LL_DEBUG);
$info['dbname'] = $dbname;
if (!isset($info['lang'])) {
$info['lang']= str_replace('_','-',$lang);
$subdomain= $info['lang'];
if (strpos($info['lang'],'test')!==false) {
return false;
}
if ($info['lang']=='simple') $info['lang']= 'en-simple';
}
else $subdomain= $info['lang'];
if (!isset($info['family'])) {
$info['family']= $family;
if ($info['family']=='wiki') $info['family']= 'wikipedia';
else if ($info['family']=='wikt') $info['family']= 'wiktionary';
}
if (!isset($info['domain'])) {
if (@$domain) $info['domain']= $domain;
else {
if ( @$info['is_closed'] ) $info['domain']= NULL;
else $info['domain']= $subdomain.'.'.$info['family'].'.org';
}
}
if (!isset($info['is_meta'])) {
if ($info['family']=='wikimedia') $info['is_meta']= 1;
else if ($info['family']=='wikimania') $info['is_meta']= 1;
else if ($info['family']=='labs') $info['is_meta']= 1;
else $info['is_meta']= 0;
}
if (!isset($info['is_multilang'])) {
if ($info['family']=='wikimania') $info['is_multilang']= 1;
else $info['is_multilang']= 0;
}
if (!isset($info['is_sensitive'])) {
if ($info['family'] === 'wiktionary') $info['is_sensitive']= 1;
else $info['is_sensitive']= 0;
}
if (!isset($info['is_closed'])) {
if (!$info['domain']) $info['is_closed']= 1;
$info['is_closed']= 0;
}
if (!isset($info['root_category'])) {
$info['root_category']= @$rootCategories[$info['domain']];
}
if (!isset($info['script_path'])) {
$info['script_path']= '/w/';
}
/*
if (!isset($info['is_broken'])) {
$info['is_broken']= in_array($brokenWikis, $info['domain']);
}
*/
$info['server'] = $server;
$dbname = $info['dbname'];
/*
if (!$thedb) {
$db= openConnection("$wsgWikiDB/$dbname");
$db->ignoreErrors( !$touchy );
if (!$db) {
wsfLog("failed to connect to database $dbname!",LL_ERROR);
return false;
}
wsfLog("connected to database $dbname",LL_DEBUG);
if ($recycle) {
$thedb = $db;
wsfLog("remembering connection to $dbname for recycling",LL_DEBUG);
}
}
else {
$db =& $thedb;
wsfLog("recycling connection for $dbname",LL_DEBUG);
}
*/
#if ($fastMode) $sql= "SELECT MAX(page_id) FROM page;";
#else $sql= "SELECT COUNT(*) FROM page;";
$sql = "SELECT ss_good_articles from $dbname.site_stats";
try {
$res= $db->query($sql,"makeWikiInfo($dbname)");
if ( $res && $row= $db->fetchRow($res) ) {
$info['size']= $row[0];
}
else {
$info['size']= false;
wsfLog("failed to determine size of wiki {$info['domain']} from $dbname (cluster $server)! ".$db->lastError(), LL_ERROR);
}
if ($res) $db->freeResult($res);
}
catch (Exception $ex) {
wsfLog("failed to determine size of wiki {$info['domain']} from $dbname (cluster $server)! ".$ex->getMessage(), LL_ERROR);
$info['size']= false;
$db->lastErrno(); //reset??
}
#if (!$recycle) $db->close();
if ( $info['size']===false ) {
return false;
}
return $info;
}
function printWikiInfo( $info ) {
print $info['dbname'];
print "\t";
print $info['lang'];
print "\t";
print $info['family'];
print "\t";
print $info['domain'];
print "\t";
print $info['size'];
print "\t";
print $info['script_path'];
print "\t";
print $info['root_category'];
print "\t";
print $info['is_meta'];
print "\t";
print $info['is_multilang'];
print "\t";
print $info['is_closed'];
print "\t";
print $info['is_sensitive'];
print "\t";
print $info['server'];
#print "\t";
#print $info['is_broken'];
print "\n";
}
function updateWikiInfo( &$db, $info ) {
global $targetTable;
$info= wsfStripIntKeys($info);
$values= '';
foreach ($info as $k => $v) {
if ($values) $values.= ', ';
$values.= "$k = ";
if (is_int($v) || is_float($v)) $values.= $v;
else if (is_bool($v)) $values.= $v ? '1' : '0';
else $values.= $db->addQuotes($v);
}
$sql = "INSERT INTO $targetTable
SET $values
ON DUPLICATE KEY UPDATE
size=VALUES(size),
root_category=IF(root_category IS NULL, VALUES(root_category), root_category);
";
$res= $db->query($sql,'WikiList::updateWikiInfo');
#if ($res) $db->freeResult($res);
return $res;
}
#--------------------------------------------------------
$update= isset($options['update']);
$csv= @$options['csv'];
$from= @$options['from'];
$fastMode= isset($options['fast']);
$touchy= isset($options['touchy']);
$dbpattern= @$options['dbpattern'];
$dbantipattern= @$options['dbantipattern'];
$truncate= @$options['truncate'];
$top= NULL;
$show= NULL;
$rootsWiki= NULL;
$recycle = true; //reuse db connection
$thedb = NULL;
if (isset( $options['roots'])) {
$rootsWiki= $options['roots'];
if ($rootsWiki===true || $rootsWiki===1) $rootsWiki= 'enwiki';
}
else if ($update) $rootsWiki= 'enwiki';
if (isset( $options['ll'])) {
$wsgLogLevel= $options['ll'];
}
if (isset( $options['top'])) {
$top= $options['top'];
}
if (isset( $options['show'])) {
$show= $options['show'];
}
$targetDB= $wsgWikiListDB;
$targetTable= $wsgWikiListTable;
if (isset($options['db'])) $targetDB= $options['db'];
if (isset($options['table'])) $targetTable= $options['table'];
$u= $targetDB;
if (strpos($u,':')===false && strpos($u, '%')!==0) $u= "$wsgWikiDB/$u";
$db= openConnection($u);
if (!$db) {
wsfLog("failed to connect to database!",LL_ERROR);
die();
}
if ($rootsWiki) {
wsfLog("finding root categories...",LL_INFO);
require_once('WikiAccess.php');
#hardwired - we have nowhere to look this up yet.
$en= new stdclass();
$en->lang='en';
$en->family='wikipedia';
$en->domain= "{$en->lang}.{$en->family}.org";
$en->script_path='/w/';
$en->baseURL="http://{$en->domain}{$en->script_path}index.php";
$en->dbname=$rootsWiki;
$en->is_multilang=false;
$en->ucFirst=true;
$en->is_sensitive=false;
$en->root_category=NULL;
$en->slurpNamespaces=true;
$en->server = 1;
if (isset($wsgWikiClusterDBs) && is_array($wsgWikiClusterDBs) && !empty($wsgWikiClusterDBs)) {
$en->dbURL= $wsgWikiClusterDBs[$en->server] . "/$rootsWiki";
} else {
$en->dbURL= "$wsgWikiDB/$rootsWiki";
}
$enwiki= WikiAccess::newInstance($en);
if ($enwiki) {
$e= $enwiki->loadInfo( 'Categories', NS_CATEGORY );
if ($e && $e['text']) {
wsfLog("Extracting links...", LL_DEBUG);
$links= $enwiki->extractLinks($e['text']);
$rootCategories['en.wikipedia.org']= 'Categories';
wsfLog("using root category for en.wikipedia.org: Categories", LL_VERBOSE);
foreach ($links as $lnk) {
if (!$lnk['lang'] || $lnk['escaped']) continue;
$domain= str_replace('_','-',$lnk['lang']).'.wikipedia.org';
$root= $lnk['link'];
$root= preg_replace('/^.*?:\s*/', '', $root);
$root= $enwiki->asDBKey($root); #FIXME: target wiki may have different ucFirst-setting!
$rootCategories[$domain]= $root;
wsfLog("found root category for $domain: $root", LL_VERBOSE);
}
}
$enwiki->close();
wsfLog("root categories complete", LL_VERBOSE);
}
}
if ($top) {
$sql= "select * from $targetTable order by size desc limit $top;";
$res= $db->query($sql,'WikiList: show top');
while ( $row= $db->fetchRow($res) ) {
printWikiInfo($row);
}
$db->freeResult($res);
}
else if ($show) {
$sql= "select * from $targetTable where domain = ".$db->addQuotes($show);
$res= $db->query($sql,'WikiList: show entry');
if ( $row= $db->fetchRow($res) ) {
printWikiInfo($row);
}
else {
wsfLog("not found: $show",LL_WARN);
}
$db->freeResult($res);
}
else if ($update || $csv) {
if ($truncate && $update && $db) {
wsfLog("truncating $$targetTable", LL_INFO);
$db->query("TRUNCATE $targetTable");
}
if ($args) {
wsfLog("processing ".sizeof($args)." wikis", LL_VERBOSE);
foreach ($args as $w) {
throw new Exception("this mode is currently borken");
$info= makeWikiInfo($w); #FIXME: broken, needs db connection and cluster id!
if (!$info) {
if (!$csv) wsfLog("skipping: $w",LL_WARN);
}
else if ($csv) printWikiInfo($info);
else {
updateWikiInfo($db,$info);
wsfLog("updated {$w}",LL_INFO);
}
}
}
else {
$clusters = $wsgWikiClusterDBs;
if (!$clusters) $clusters = array( 1 => $wsgWikiDB );
foreach ($clusters as $serverNr => $serverURL) {
$serverDB = openConnection($serverURL."/toolserver");
if (!$serverDB) {
wsfLog("failed to connect to $serverNr: $serverURL", LL_ERROR);
continue;
}
wsfLog("listing databases for cluster $serverNr (pattern: $$dbpattern)", LL_VERBOSE);
$sql= "show databases";
#if ($dbpattern) $sql .= " like ".$serverDB->addQuotes($dbpattern)." ";
$sql .= ";";
$res= $serverDB->query($sql,'WikiList: show databases');
#$serverDB->ignoreErrors( !$touchy );
while ( $row= $serverDB->fetchRow($res) ) {
if ( $from && $row[0] < $from ) continue;
if ( $dbpattern && !preg_match('/'.$dbpattern.'/i', $row[0]) ) continue;
if ( $dbantipattern && preg_match('/'.$dbantipattern.'/i', $row[0]) ) continue;
$info= makeWikiInfo($row[0], $serverDB, $serverNr);
if (!$info) {
if (!$csv) wsfLog("skipping: {$row[0]}",LL_WARN);
}
else if ($csv) printWikiInfo($info);
else {
updateWikiInfo($db,$info);
wsfLog("updated {$row[0]}",LL_INFO);
}
}
$serverDB->freeResult($res);
if ($serverDB != $db) $serverDB->close();
}
}
}
else {
wsfLog("nothing to do. Use --top, --show, or --update",LL_WARN);
}
$db->close();
?>
WikiList.php
application/x-php, 21013 bytes (load raw)

