<?php
if (@$argv[1]==='--test=WikiAccess' && !@$_SERVER['REQUEST_URI'] && !defined('WS_WEB')) {
define('WS_CONSOLE',1);
define('WS_TESTING',1);
define('WS_TEST_WIKIACCESS',1);
require_once('WSInit.php');
}
if (!defined('WS_WEB')) die('not a valid entry point!');
require_once("WikiSense.php");
require_once("includes/Sanitizer.php");
require_once("includes/Revision.php");
require_once("includes/HistoryBlob.php");
if ($wsgAllowExternalStorage && $wgExternalServers) {
require_once("includes/ExternalStore.php");
require_once("includes/ExternalStoreDB.php");
}
define ('TEMPLATE_PATTERN', '/\{\{([^\|\}]+)(\|[^}]*)*\}\}/'); #FIXME: incomplete. handle nested templates/parameters...
define ('REDIRECT_PATTERN', '/^ *#REDIRECT(?:ION)? ?:? *\[\[([^\]\|]+)[^\]]*\]\].*/mi'); #FIXME: #REDIRECT may be translated!
class WikiAccess {
var $language;
var $multilang;
var $domain;
var $baseURL;
var $wikiDB;
var $dbname;
var $ucFirst;
var $translations;
var $family;
var $sitename;
var $nstalk;
var $shortcuts;
var $templateClasses;
var $namespaces;
var $cacheDB;
var $cacheTable;
var $boxPattern;
var $boxNormalizer;
var $linkPattern;
var $linkTrail;
var $root_category;
var $stopwords;
var $stopwordDB;
var $slurpNamespaces;
var $tagCategories; #maps canonical to local tag categories
var $tagsByCategory; #cache of canonical type -> array-of-tags
var $stripYearPattern;
function WikiAccess($lang, $family) {
$this->language= $lang;
$this->multilang= false;
$this->domain= "$lang.$family.org";
$this->baseURL= "http://$lang.$family.org/w/index.php";
$this->ucFirst= true;
$this->root_category= NULL;
$this->family= $family;
$this->sitename= ucfirst($family);
$this->nstalk= 'talk';
$this->shortcuts= NULL;
$this->dbname= $lang.$family;
$this->wikiDB= NULL;
$this->templateClasses= NULL;
$this->namespaces= NULL;
$this->cacheDB= NULL;
$this->cacheTable= NULL;
$this->boxPattern= NULL;
$this->boxNormalizer= NULL;
$this->slurpNamespaces= false;
$this->linkPattern= NULL;
$this->linkTrail= NULL;
$this->stopwords= array();
$this->stopwordDB= NULL;
$this->tagCategories= false;
$this->tagsByCategory= array();
$this->stripYearPattern = "![-,;/_|\\\\]?\\s*{!y!}[.]?[\\s.]*$|^{!y!}[-.,;/_|\s\\\\]*!";
}
function getLinkPattern() {
if ($this->linkPattern) return $this->linkPattern;
if (!$this->linkTrail) {
if ($this->translations) {
$tr= $this->translations->getMessage('linktrail');
if (preg_match('!^/\^\((.*)\)\(\.\*\)\$/[sDu]*$!',$tr,$m)) {
$this->linkTrail= $m[1];
}
}
}
if (!$this->linkTrail) $this->linkTrail= '\pL+';
$this->linkPattern= '!\[\[([^\|\]]+)(\|[^\]]*)*\]\]('.$this->linkTrail.')?!sDu';
#print "LINK_PATTERN: ".$this->linkPattern."\n";
return $this->linkPattern;
}
function initTranslations() {
global $IP, $wsgLangVars, $wsgLangAliases;
if ($this->translations) return;
$lang= $this->language;
$lang= $this->lc( str_replace('_','-',$lang) );
if (isset($wsgLangAliases[$lang])) $lang= $wsgLangAliases[$lang];
$lang= ucfirst( str_replace('-','_',$lang) );
foreach ($wsgLangVars as $v) {
$var= "$v$lang";
global $$var;
$var= "{$v}En";
global $$var;
}
global $wgMetaNamespace, $wgSitename, $wgMemc, $wgCanonicalNamespaceNames;
$globalMetaNamespace= $wgMetaNamespace;
$globalSitename= $wgSitename;
$wgMetaNamespace= str_replace( ' ', '_', $this->sitename);
$wgSitename= str_replace( '_', ' ', $this->sitename);
$wgCachedMessageArrays = false; #NOTE: NOT global!
$wgDefaultSkin= 'dummy';
include_once("$IP/includes/Hooks.php");
include_once("$IP/includes/MagicWord.php");
include_once("$IP/languages/Language.php");
#include_once("$IP/languages/LanguageUtf8.php");
$nsvarname= "wgNamespaceNames$lang";
#NOTE: force $wgCanonicalNamespaceNames entries to use "local" $wgMetaNamespace
#XXX: wtf? why?!
$wgCanonicalNamespaceNames[NS_PROJECT]= $wgMetaNamespace;
$wgCanonicalNamespaceNames[NS_PROJECT_TALK]= $wgMetaNamespace . '_talk';
if (isset($$nsvarname)) {
$nsnames= &$$nsvarname;
$nsnames[NS_PROJECT]= $wgMetaNamespace;
$nsnames[NS_PROJECT_TALK]= $wgMetaNamespace . '_' . ( isset($nsnames[NS_TALK]) ? $nsnames[NS_TALK] : $wgCanonicalNamespaceNames[NS_TALK] );
#FIXME: other order/separator, different name...
#print "PROJECT TALK: ".$nsnames[NS_PROJECT_TALK]."\n";
}
$langClass = 'Language' . str_replace( '-', '_', $lang );
wfSuppressWarnings();
include_once("$IP/languages/$langClass.php");
wfRestoreWarnings();
if( ! class_exists( $langClass ) ) {
# Default to English/UTF-8
$lc = strtolower(substr($langClass, 8));
$snip = "
class $langClass extends Language {
function getVariants() {
return array(\"$lc\");
}
}";
eval($snip);
}
$this->translations= new $langClass();
$this->translations->initEncoding();
$this->translations->load();
$this->translations->namespaceNames[NS_PROJECT] = $this->sitename;
#TODO: NS_PROJECT_TALK will be set to $wgMetaNamespaceTalk if that variable is ...
#print_r( $this->translations->namespaceNames );
if ( strcasecmp(get_class($this->translations), $langClass) ) {
wsfLog("unable to find language class for {$this->language}, using using class ".get_class($this->translations) , LL_WARN );
}
#disable message cache
#TODO: only do this if the cache was freshly initalized
global $wgDBpassword, $wgUseDatabaseMessages, $wgMessageCache;
if ((!$wgDBpassword || !$wgUseDatabaseMessages) && isset($wgMessageCache) && !is_null($wgMessageCache)) {
$wgMessageCache->mDisable= true;
}
$wgMetaNamespace= $globalMetaNamespace;
$wgSitename= $globalSitename;
wsfLog("initialized language object for ".$this->language." ($lang), using class ".get_class($this->translations) , LL_DEBUG );
}
function formatDate($ts, $showYear= true) {
$this->initTranslations();
$ts= wfTimestamp(TS_MW, $ts);
$s= $this->translations->date($ts, false, false, false);
#UGLY HACK!
if (!$showYear) {
$y= $this->translations->sprintfDate("Y", $ts);
$ptrn = $this->stripYearPattern;
$ptrn = str_replace("{!y!}", $y, $ptrn);
$s= preg_replace($ptrn,'',$s);
}
return $s;
/*
global $wgAmericanDates, $wgMonthNamesEn;
$datePreference = $this->translations->dateFormat( false );
if( $datePreference == MW_DATE_DEFAULT ) {
$datePreference = $wgAmericanDates ? MW_DATE_MDY : MW_DATE_DMY;
}
$month= substr( $ts, 4, 2 );
$month= $this->translations->getMessage($wgMonthNamesEn[$month-1]);
$year = $this->translations->formatNum( substr( $ts, 0, 4 ), true );
$day = $this->translations->formatDay( substr( $ts, 6, 2 ), $datePreference );
if ($showYear) {
switch( $datePreference ) {
case MW_DATE_DMY: return "$day $month $year";
case MW_DATE_YMD: return "$year $month $day";
case MW_DATE_ISO: return substr($ts, 0, 4). '-' . substr($ts, 4, 2). '-' .substr($ts, 6, 2);
default: return "$month $day, $year";
}
}
else {
switch( $datePreference ) {
case MW_DATE_DMY: return "$day $month";
case MW_DATE_YMD: return "$year $month";
case MW_DATE_ISO: return substr($ts, 4, 2). '-' .substr($ts, 6, 2);
default: return "$month $day";
}
}*/
}
function &newInstance( $domain, $db = NULL, $url = NULL ) {
if (is_object($domain)) $info= $domain;
else $info= getWikiInfoFromDomain($domain);
if (!$info) {
wsfLog("ERROR: no wiki info for $domain!" , LL_ERROR );
return $info;
}
$class= NULL;
$lang= $info->lang;
if (!is_array($lang)) $lang= explode('*',$lang);
$lang= $lang[0];
#wsfLog("crating WikiAccess for $domain (url: $url)" , LL_DEBUG );
$class= wsfGetExpertClass( 'WikiAccess', $info->dbname, $info->domain, $info->family, $info->lang, $info->is_multilang );
$wiki= new $class($info->lang, $info->family);
$conf= wsfLoadExpertConfig( 'WikiConfig', 'wiki', $info->dbname, $info->domain, $info->family, $info->lang, $info->is_multilang );
wsfConfigureObject($wiki, $conf);
global $wsgWikis, $wsgSitenames;
if (isset($wsgWikis[$info->domain])) wsfConfigureObject($wiki, $wsgWikis[$info->domain], true);
if (isset($wsgSitenames[$info->domain])) $wiki->sitename= $wsgSitenames[$info->domain];
$wiki->info=& $info;
$wiki->language= $info->lang;
$wiki->multilang= @$info->is_multilang;
$wiki->domain= $info->domain;
$wiki->dbname= $info->dbname;
$wiki->dbURL= $info->dbURL;
$wiki->dbServerId= $info->server;
$wiki->baseURL= "http://{$info->domain}{$info->script_path}/index.php";
if (isset($info->ucFirst)) $wiki->ucFirst= $info->ucFirst;
if (isset($info->root_category)) $wiki->root_category= $info->root_category;
if ($db!==false) {
if ($db === NULL) $db= $info->dbURL;
if ( $db !== NULL && !is_object($db) ) {
$u= $db;
$db= openConnection($db);
if (!$db) wsfLog("Failed to connect to database!" , LL_ERROR );
$wiki->dbURL= $u;
}
if ($db) {
$wiki->wikiDB= $db;
#UGLY HACK!
global $wgLoadBalancer;
if (!isset($wgLoadBalancer) || !$wgLoadBalancer) $wgLoadBalancer= new PseudoLoadBalancer( $db );
#NOW DONE BY openConnection:
#global $wsgTransactionMode;
#if (isset($wsgTransactionMode) && $wsgTransactionMode !== NULL) $wiki->setTransactionMode($wsgTransactionMode);
}
}
return $wiki;
}
function close( ) {
if ($this->wikiDB) $this->wikiDB->close();
if ($this->stopwordDB) $this->stopwordDB->close();
$this->wikiDB= NULL;
$this->stopwordDB= NULL;
$this->translations= NULL;
}
function setTransactionMode($level) {
if ($level===false) $level= 'READ UNCOMMITTED';
else if ($level===true) $level= 'REPEATABLE READ';
$this->wikiDB->query("SET SESSION TRANSACTION ISOLATION LEVEL $level");
}
function getTableName($table, $qualified=false) {
global $wsgTableNames;
$n= $table;
if (isset($wsgTableNames) && isset($wsgTableNames[$n])) $n= $wsgTableNames[$n];
if ($this->wikiDB) $n= $this->wikiDB->tableName($n);
else $n= '`'.$n.'`';
if ($qualified && $this->dbname) $n= $this->dbname.'.'.$n;
return $n;
}
function getInterwikiDomain($prefix) {
#FIXME: ugly hack!
$m= array();
if ( preg_match('/([\w\d]+)\.([\w\d]+)\.org/',$this->domain,$m) ) {
$f= $m[2];
$l= $m[1];
}
else {
$f= 'wikimedia';
$l= $en;
}
if ($f == 'wikimedia' || $l == 'commons' || $l == 'species' ) {
$f= 'wikipedia';
}
return "$prefix.$f.org";
}
function lc($s) {
if (!$s) return $s;
if (isset($this) && $this->translations) {
return $this->translations->lc($s);
}
else if (function_exists('mb_strtolower')) {
return mb_strtolower($s,'utf8');
}
else {
return strtolower($s); #warning: destructive.
}
}
function ucfirst($s) {
if (!$s) return $s;
if (isset($this) && $this->translations) {
$t= $this->translations->ucfirst($s);
}
else if (function_exists('mb_strtoupper')) {
$t= mb_strtoupper( mb_substr( $s, 0, 1, 'utf8' ), 'utf8' ) . mb_substr( $s, 1, mb_strlen($s)-1, 'utf8' );
}
else {
$t= ucfirst($s); #warning: destructive.
}
return $t;
}
function lcfirst($s) {
if (!$s) return $s;
if (isset($this) && $this->translations) {
$t= $this->translations->lcfirst($s);
}
else if (function_exists('mb_strtolower')) {
$t= mb_strtolower( mb_substr( $s, 0, 1, 'utf8' ), 'utf8' ) . mb_substr( $s, 1, mb_strlen($s)-1, 'utf8' );
}
else {
$t= strtolower( substr( $s, 0, 1 ) ) . substr( $s, 1, strlen($s)-1 ); #warning: destructive
}
return $t;
}
function asTitle($title) {
if (!is_string($title)) throw new Exception("title must be a string");
if (strpos($title,'%')!==false) $title= urldecode($title); #HACKish...
$title= Sanitizer::decodeCharReferences( $title );
$title= trim($title);
if ( $this->ucFirst ) $title= $this->ucfirst( $title );
$title= str_replace('_',' ',$title);
return $title;
}
function asLabel($title) {
if (strpos($title,'%')!==false) $title= urldecode($title); #HACKish...
$title= Sanitizer::decodeCharReferences( $title );
$title= trim($title);
$title= str_replace('_',' ',$title);
$title= stripMarkup($title);
return $title;
}
function asSectionTitle($title) {
if (strpos($title,'%')!==false) $title= urldecode($title); #HACKish...
$title= Sanitizer::decodeCharReferences( $title );
$title= trim($title);
$title= str_replace('_',' ',$title);
return $title;
}
function asDBKey($title) {
$title= trim($title);
if (strpos($title,'%')!==false) $title= urldecode($title); #HACKish...
$title= Sanitizer::decodeCharReferences( $title );
$title= trim($title);
if ( $this->ucFirst ) $title= $this->ucfirst( $title );
$title= preg_replace('/\s+/','_',$title);
return $title;
}
function asUrlName($ns, $title, $encode = false) {
$title= $this->asDbKey($title);
if ($ns) {
if (!is_int($ns) && !is_numeric($ns)) $ns = $this->getNsIndex($ns);
$title = $this->getNsText($ns) . ':' . $title;
}
if ($encode) $title = urlencodeTitle($title);
return $title;
}
function getNsText($ns) {
if ($ns == NS_MAIN) return '';
#else if ($ns==NS_PROJECT && @$this) return $this->sitename;
#else if ($ns==NS_PROJECT_TALK && @$this) return $this->sitename.'_'.$this->nstalk;
#FIXME: apperently still broken for project talk on cswiki !!!!!!!!!!!!!!!!!!!!!!!!!!!!!
if (@$this) {
$t = getCustomNsText($this, $ns, @$this ? $this->slurpNamespaces : false);
if ($t) return $t;
}
if (!$t && @$this && $this->namespaces) {
if (isset($this->namespaces[$ns])) $t = $this->namespaces[$ns];
if ($t) return $t;
}
if (@$this && $this->translations) {
$t= $this->translations->getNsText($ns);
if ($t) return $t;
}
else {
global $wgCanonicalNamespaceNames;
if (isset($wgCanonicalNamespaceNames[$ns])) return $wgCanonicalNamespaceNames[$ns];
//else if (isset($wgNamespaceNamesEn[$ns])) return $wgNamespaceNamesEn[$ns];
return $t;
}
}
function getNsIndex($ns) {
if (!$ns) return 0;
if (is_int($ns) || is_numeric($ns)) return (int)$ns;
if (@$this) $ns= str_replace(' ','_',$this->lc(trim($ns)));
else $ns= str_replace(' ','_',WikiAccess::lc(trim($ns)));
if (@$this && isset($this->shortcuts[$ns])) return $this->shortcuts[$ns];
$n= MWNamespace::getCanonicalIndex( $ns );
#print "==> canonical index of $ns: $n\n";
if (($n===NULL || $n===false) && @$this) {
$n= getCustomNsIndex($this, $ns, $this->slurpNamespaces);
}
if (($n===NULL || $n===false) && @$this && $this->namespaces) {
foreach($this->namespaces as $i => $s) {
$s= str_replace(' ','_',$this->lc(trim($s)));
#print "NS: $s ?= $ns\n";
if ($s == $ns) return $i;
}
}
if (($n===NULL || $n===false) && @$this && $this->translations) {
$n= $this->translations->getNsIndex($ns);
#print "==> local index of $ns: $n\n";
}
if (($n===NULL || $n===false) && @$this) {
if ($ns==$this->lc($this->sitename)) $n= NS_PROJECT;
else if ($ns==$this->lc($this->sitename.'_'.$this->nstalk)) $n= NS_PROJECT_TALK;
}
return $n;
}
function getTalkPage($title) {
$lnk= $this->makeLinkEntry($title); #FIXME: sometimes, the split does not work right!
if ($lnk['lang']) return false;
$ns= $lnk['namespacenum'];
if (($ns % 2) > 0) return $title;
$ns= $this->getNsText( $ns + 1 );
return $ns.':'.$lnk['link'];
}
function makeSourceLink( $title, $ns = NS_MAIN ) {
return $this->makePageLink($title, $ns) . '&action=raw';
}
function makePageLink( $title, $ns = NS_MAIN ) {
return $this->baseURL . '?title=' . $this->asUrlName( $ns, $title, true );
}
function makeExpandableCategoryItemHTML($cat, $t = NULL, $tag = 'div', $attr = "") {
$n = $this->asDBKey($cat);
$u = $this->makePageLink($cat, NS_CATEGORY);
if (!$t) $t = $this->asTitle($cat);
$s = "<$tag $attr>";
$s.= "[<a href='javascript:void(0)' onclick=\"bonsaiExpand(this, '".urlencode($this->domain)."', '".urlencode($n)."');\" title=\"load\">+</a>]";
$s.= " <span><a href=\"$u\">".escapeHtml($t)."</a></span>";
$s.= "</$tag>";
return $s;
}
function createCache() {
if (!$this->initCache()) {
wsfLog("no cache database, not creating table!", LL_WARN );
return;
}
$sql= 'CREATE TABLE IF NOT EXISTS `'.$this->cacheTable.'` (';
$sql.= ' page_id INT(8) NOT NULL, ';
$sql.= ' page_namespace INT(11) NOT NULL, ';
$sql.= ' page_title VARCHAR(255) binary NOT NULL, ';
$sql.= ' page_len INT(8) NOT NULL, ';
$sql.= ' rev_id INT(8) NOT NULL, ';
$sql.= ' rev_timestamp VARCHAR(14) NOT NULL, ';
$sql.= ' text_id INT(8) NOT NULL, ';
$sql.= ' text MEDIUMTEXT binary NOT NULL, ';
$sql.= ' PRIMARY KEY (rev_id), ';
$sql.= ' UNIQUE KEY (text_id), ';
$sql.= ' KEY (page_id,rev_timestamp), ';
$sql.= ' KEY (page_namespace,page_title,rev_timestamp) ';
$sql.= ') ENGINE=MyISAM DEFAULT CHARSET=binary';
$this->cacheDB->query($sql,'createCache');
wsfLog("created cache table ".$this->cacheTable, LL_INFO );
}
function getFromCache( $field, $value, $otherwiki = NULL ) {
if (!$this->initCache()) return false;
if ($otherwiki) {
global $wsgTextCacheTables;
$table= @$wsgTextCacheTables[$otherwiki->domain];
if (!$table) return false;
}
else $table= $this->cacheTable;
$sql= ' SELECT * FROM '.$table;
$sql.= ' WHERE `'.$field.'` = '.$this->cacheDB->addQuotes($value);
$sql.= ' ORDER BY rev_timestamp DESC LIMIT 1';
$res= $this->cacheDB->query($sql,'getFromCache');
$row = $this->cacheDB->fetchRow($res);
$this->cacheDB->freeResult($res);
wsfStripIntKeys($row);
return $row;
}
function storeInCache($entry, $text, $otherwiki = NULL) {
if (!$this->initCache()) return false;
if ($otherwiki) {
global $wsgTextCacheTables;
$table= @$wsgTextCacheTables[$otherwiki->domain];
if (!$table) return false;
}
else $table= $this->cacheTable;
$sql= ' INSERT IGNORE INTO '.$table.' SET ';
$sql.= ' page_id = '.$this->cacheDB->addQuotes($entry['page_id']).', ';
$sql.= ' page_namespace = '.$this->cacheDB->addQuotes($entry['page_namespace']).', ';
$sql.= ' page_title = '.$this->cacheDB->addQuotes($entry['page_title']).', ';
$sql.= ' page_len = '.$this->cacheDB->addQuotes($entry['page_len']).', ';
$sql.= ' rev_id = '.$this->cacheDB->addQuotes($entry['rev_id']).', ';
$sql.= ' rev_timestamp = '.$this->cacheDB->addQuotes($entry['rev_timestamp']).', ';
$sql.= ' text_id = '.$this->cacheDB->addQuotes($entry['text_id']).', ';
$sql.= ' text = '.$this->cacheDB->addQuotes($text);
$this->cacheDB->query($sql,'storeInCache');
}
function selectDB($db = NULL) {
if ($db && is_object($db)) $db = $db->dbname;
if ($db===false) return false;
if ($db===NULL) $db = $this->dbname;
$selected = $this->wikiDB->getProperty('mDBname');
if ($selected != $db) $this->wikiDB->selectDB($db);
return $selected;
}
function loadTextHTTP($title, $ns=NULL, $rev=NULL, $otherwiki = NULL) {
global $wsgWikiProxy, $wsgWikiProxyToken, $wsgDirectAccessHost;
$title= $this->asUrlName($ns, $title, false);
if ($wsgDirectAccessHost && !function_exists('curl_init')) {
wsfLog('cURL not supported, disabling direct access (aka quid bypass)',LL_WARN);
$wsgDirectAccessHost= NULL;
}
$curl= false;
if ($wsgWikiProxy) {
if ($otherwiki) $d= $otherwiki->domain;
else $d= $this->domain;
$url= $wsgWikiProxy . '?wiki='.urlencode($d).'&title='.urlencodeTitle($title);
if ($rev) $url.= '&rev='.urlencode($rev);
if ($wsgWikiProxyToken) $url.= '&token='.$wsgWikiProxyToken;
}
else if ($wsgDirectAccessHost) {
$url= $wsgDirectAccessHost;
$url.= '?title='.urlencodeTitle($title);
if ($rev) $url.= '&oldid='.urlencode($rev);
$url.= '&action=raw';
if ($otherwiki) $domain= $otherwiki->domain;
else $domain= $this->domain;
$curl= true;
}
else {
if ($otherwiki) $base= $otherwiki->baseURL;
else $base= $this->baseURL;
$url= $base;
$url.= '?title='.urlencodeTitle($title);
if ($rev) $url.= '&oldid='.urlencode($rev);
$url.= '&action=raw';
}
if ($curl) {
wsfLog("(cURL) loading page $title from URL $url" , LL_DEBUG );
$curl= curl_init();
curl_setopt($curl, CURLOPT_URL, $url); #fetch from $url
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); #capture and return the data
curl_setopt($curl, CURLOPT_TIMEOUT, 10); #timeout after 10 sec
curl_setopt($curl, CURLOPT_USERAGENT, ini_get('user_agent')); #set User-Agent string
curl_setopt($curl, CURLOPT_HTTPHEADER, array(
"Host: $domain", #use HOST header to select Virtual Host
));
$text= curl_exec($curl);
if (curl_errno($curl)) wsfLog("curl returned an error: ".curl_error($curl), LL_WARN);
curl_close($curl);
}
else {
wsfLog("loading page $title from URL $url" , LL_DEBUG );
wfSuppressWarnings();
$text= file_get_contents( $url );
wfRestoreWarnings();
}
#NOTE: this happens quite often...
#if ($text === NULL || $text===false || $text==='') wsfLog("got empty page $title from URL $url" , LL_WARN );
#FIXME: does a 404 give back NULL or what?!
return $text;
}
function initCache() {
global $wsgTextCacheDB, $wsgTextCacheTables;
if (!$wsgTextCacheDB) return false;
if ($this->cacheDB === false) return false;
if (is_object($this->cacheDB)) return true;
$this->cacheTable= @$wsgTextCacheTables[$this->domain];
if (!$this->cacheTable) {
$this->cacheDB= false;
return false;
}
$this->cacheDB= openConnection($wsgTextCacheDB);
if (!$this->cacheDB) {
$this->cacheDB= false;
return false;
}
return true;
}
/**
* fill in text 'text' field of the given entry,
* use cache if possible, update cache if neccessary.
*/
function aquireCacheRecord( $entry, $otherwiki = NULL ) {
$olddb= NULL;
// $dbname= NULL;
$usecache= $this->initCache();
#print "==> aquireCacheRecord\n";
if (!$usecache) {
wsfLog("no caching configured, aquireCacheRecord fails" , LL_DEBUG );
$e= NULL;
}
else {
/* if ($dbname) {
$olddb= $this->cacheDB->getProperty('mDBname');
$this->cacheDB->selectDB($dbname);
}*/
$e= $this->getFromCache('rev_id',$entry['rev_id'],$otherwiki);
if ($e) wsfLog("got revision {$entry['rev_id']} from cache" , LL_DEBUG );
}
if ($e) {
$e['_cache'] = 'hit';
return $e;
}
#print_r($entry);
$text= $this->loadTextHTTP($entry['page_title'],$entry['page_namespace'],$entry['rev_id'],$otherwiki);
if ($text === NULL || $text===false) $entry= false;
else {
$entry['text']= $text;
if ($usecache) {
$this->storeInCache($entry,$text,$otherwiki);
wsfLog("stored revision {$entry['rev_id']} in cache" , LL_DEBUG );
$entry['_cache'] = 'miss';
}
else $entry['_cache'] = 'bypassed';
}
return $entry;
}
/*
* fetch a full record for a page specified by full title, or title and namespace.
* This uses aquireCacheRecord to fetch the text or Revision::getRevisionText($e)
* for compressed text and stub objects.
* If no revision is specified, return the newest revision.
*/
function fetchPageRecord( $title, $ns=NULL, $rev=NULL, $otherwiki = NULL ) {
global $wsgAllowExternalStorage, $wsgExternalStorageDbExpression, $wgExternalServers, $wgExternalLoadBalancers;
#print "==> fetchPageRecord\n";
if ($title===false || $title==='') $title= NULL;
if ($rev===false || $rev==='') $rev= NULL;
if ($ns && is_string($ns) && !is_numeric($ns)) $ns= $this->getNsIndex($ns);
if (!$this->wikiDB) {
if ($title === NULL) return false; #no title given, no db to look up the revision -> give up.
$text= $this->loadTextHTTP( $title, $ns, $rev, $otherwiki );
if ($text===false || $text===NULL) return false;
$info['page_title']= $title;
$info['page_namespace']= $ns ? $ns : 0; #FIXME: split title if neccessary...
$info['text']= $text;
$info['rev_timestamp']= wfTimestampNow();
$info['page_id']= NULL;
$info['rev_id']= $rev;
$info['text_id']= NULL;
$info['_cache']= 'remote (no wiki db)';
return $info;
}
//$olddb= $this->selectDB($otherwiki);
$e= $this->fetchWikiRecord( $title, $ns, $rev, $otherwiki );
if (!$e) {
wsfLog("no entry found for page $title (ns=$ns, rev=$rev, wiki=".($otherwiki?$otherwiki->domain:'-').")", LL_DEBUG);
//$this->selectDB($olddb);
return false;
}
if ($e['rev_deleted']) {
wsfLog("access denied to revision $rev", LL_DEBUG);
//$this->selectDB($olddb);
return false;
}
$rec = NULL;
if ($e['text_flags']) {
$external = preg_match('/(^|,)(external)($|,)/',$e['text_flags']);
if (!$external || ($wsgAllowExternalStorage && $wgExternalServers)) {
$skip = false;
try {
#HACK: mangle external store config, so it applies to the right server name. updating global config for a single call is icky :(
if ($external) {
#HACK: extract cluster name
$url = trim($e['text']);
if (preg_match('!^DB://([^/]+)/!', $url, $m)) { #if DB store
$cluster = $m[1];
#HACK: guess db name
$extDbName = $otherwiki ? $otherwiki->dbname : $this->dbname;
if ($wsgExternalStorageDbExpression) $extDbName = eval($wsgExternalStorageDbExpression);
#HACK: set db name for all external storage servers.
$servers = @$wgExternalServers[$cluster];
if (!$servers) {
wsfLog("missing entry for $cluster in \$wgExternalServers", LL_WARN);
$skip = true;
}
else {
foreach($servers as $idx => $srv) {
$wgExternalServers[$cluster][$idx]['dbname'] = $extDbName;
}
}
#HACK: select appropriate dbname in all existing dbconnections in $wgExternalLoadBalancers
$balancer = @$wgExternalLoadBalancers[$cluster];
if ($balancer) {
foreach ($balancer->mConnections as $i => $conn) {
if (!$conn->isOpen() || !$conn->getProperty("mConn")) {
wsfLog("warning: external store db connection (".$conn->getProperty("mServer")."/".$conn->getProperty("mDBname").") closed! reopening.", LL_WARN);
$conn->open(
$conn->getProperty("mServer"),
$conn->getProperty("mUser"),
$conn->getProperty("mPassword"),
$conn->getProperty("mDBname")
);
}
$conn->selectDB($extDbName);
}
}
}
}
#FIXME: may or may not be able to handle 'object' flag...
if (!$skip) {
$r= new stdClass();
$r->old_flags =& $e['text_flags'];
$r->old_text =& $e['text'];
$text= Revision::getRevisionText($r);
if ($text===NULL || $text===false) {
//NOTE: this may happen because something is broken, or because the text storage server is lagging.
wsfLog("failed to resolve revision text", LL_DEBUG);
}
else {
#print "<br/>got text (flags: {$e['text_flags']}; spec: {$e['text']}; rev: {$e['rev_id']}; dbname: $extDbName)<br/>";
$rec= $e;
$rec['text']= $text;
$rec['_cache']= 'local (processed: '.$e['text_flags'].')';
}
}
}
catch (DBError $ex) {
wsfLog("failed to resolve revision text: ".$ex->getText(), LL_DEBUG);
}
}
if (!$rec) {
if (!$this->initCache()) {
$text= $this->loadTextHTTP($e['page_title'],$e['page_namespace'],$e['rev_id'],$otherwiki);
$rec= $e;
$rec['text']= $text;
$rec['_cache']= 'remote (external, no cache)';
}
else {
$rec= $this->aquireCacheRecord($e, $otherwiki);
}
}
}
else {
$rec= $e;
$rec['_cache']= 'local (db)';
}
//$this->selectDB($olddb);
return $rec;
}
function tableName($table, $otherwiki = NULL ) {
if ($otherwiki) return $otherwiki->dbname . '.' . $table; //FIXME: table-prefix for otherwiki!
else return $this->dbname . '.' . $this->wikiDB->tableName($table);
}
/*
* fetch a record from the wiki database, for the page
* specified by full title, or title and namespace.
* This does NOT try to resolve externalized text in any form.
* If no revision is specified, return the newest revision.
*/
function fetchWikiRecord( $title, $ns=NULL, $rev=NULL, $otherwiki = NULL ) {
if ($title===false || $title==='') $title= NULL;
if ($rev===false || $rev==='') $rev= NULL;
if ($title === NULL && $rev === NULL) return false; #no title, no revision id.
if ($ns === NULL && $title !== NULL) {
$lnk= $this->makeLinkEntry($title);
if ($lnk['lang']) return false;
$ns= $this->getNsIndex($lnk['namespace']);
$title= $lnk['link'];
}
if ($title !== NULL) $title= $this->asDBKey($title);
wsfLog("fetchWikiRecord($title, $ns, $rev, ".($otherwiki?$otherwiki->domain:'-').");", LL_TRACE);
$sql= "
SELECT page_id, page_namespace, page_title, page_len, page_touched, page_is_redirect,
rev_id, rev_timestamp, rev_deleted,
old_id as text_id, old_text as text, old_flags as text_flags
FROM " . $this->tableName('text', $otherwiki) . "
JOIN revision ON rev_text_id = old_id
";
if ($rev) $sql .= " JOIN " . $this->tableName('page', $otherwiki) . " ON rev_page = page_id ";
else $sql .= " JOIN " . $this->tableName('page', $otherwiki) . " ON rev_id = page_latest ";
if ($title !== NULL) {
$sql .= " WHERE page_title = " . $this->wikiDB->addQuotes( $title );
$sql .= " AND page_namespace = " . (int)$ns;
if ($rev) $sql .= " AND rev_id = " . (int)$rev;
}
else {
if ($rev) $sql .= " WHERE rev_id = " . (int)$rev;
}
$sql .= " AND rev_deleted < 2"; // 0=normal, 1=content-hidden; see http://www.mediawiki.org/wiki/Bitfields_for_rev_deleted
$olddb= $this->selectDB($otherwiki);
#print "*** {$this->dbname}/$otherwiki@".$this->wikiDB->getProperty('mServer')." (#{$this->dbServerId}): ***";
#print "*** $sql ***";
$res= $this->wikiDB->query($sql,'fetchWikirecord');
$rec= $this->wikiDB->fetchRow( $res );
wsfStripIntKeys($rec);
$this->wikiDB->freeResult($res);
$this->selectDB($olddb);
return $rec;
}
/** compatibility wrapper for fetchPageRecord */
function loadInfoFromDB( $title, $ns = NS_MAIN, $otherwiki = NULL ) {
$info= $this->fetchPageRecord($title,$ns,NULL,$otherwiki);
if (!$info) return false;
$info['title']= $info['page_title'];
$info['namespace']= $info['page_namespace'];
$info['id']= $info['page_id'];
$info['timestamp']= $info['rev_timestamp'];
if ($otherwiki) {
$title= $info['title'];
$title= $this->asUrlName($ns, $title);
$info['url']= $otherwiki->baseURL.'?title='.urlencodeTitle($title).'&action=raw';
}
else $info['url']= $this->makeSourceLink( $info['title'], $info['namespace'] );
return $info;
/*
$title= $this->asDBKey($title);
wsfLog("loading page $title from database $dbname" , LL_DEBUG );
$ttext= $this->tableName( 'text' );
$trevision= $this->tableName( 'revision' );
$tpage= $this->tableName( 'page' );
$olddb= $this->selectDB($otherwiki);
$sql= "
SELECT old_text, old_flags, rev_timestamp, page_id
FROM $ttext
JOIN $trevision ON rev_text_id = old_id
JOIN $tpage ON rev_id = page_latest
";
if ( $ns !== NULL ) $sql .= " WHERE page_namespace = $ns AND page_title = " . $this->wikiDB->addQuotes( $title ) ;
else $sql .= " WHERE page_title = " . $this->wikiDB->addQuotes( $title );
$res= $this->wikiDB->query( $sql, 'WikiAccess::loadSource' );
if ( $row = $this->wikiDB->fetchObject( $res ) ) {
wsfLog("extractting text..." , LL_TRACE );
if (!$wsgAllowExternalStorage && $row->old_flags) {
$f= explode(',',$row->old_flags);
if (in_array('external',$f)) {
#NOTE: can't access external storage, use HTTP to fetch text
$url= $this->makeSourceLink( $title, $ns );
wsfLog("loading page $title from URL $url" , LL_DEBUG );
wfSuppressWarnings();
$info['text']= file_get_contents( $url );
wfRestoreWarnings();
}
}
if (!isset($info['text'])) $info['text']= Revision::getRevisionText( $row );
$info['timestamp']= $row->rev_timestamp;
$info['id']= $row->page_id;
}
else $info= false;
$this->wikiDB->freeResult( $res );
$this->selectDB($olddb);
if (!$info) return false;
wsfLog("got page from database" , LL_DEBUG );
return $info;
*/
}
/** conveniance wrapper for fetchWikiRecord */
function loadInfo( $title, $ns = NS_MAIN, $file=NULL ) {
$url= $this->makeSourceLink( $title, $ns );
$info= array(
'title' => $title,
'namespace' => $ns,
'url' => $url,
);
if ( $file ) {
wsfLog("loading page $title from file $file" , LL_DEBUG );
$text= file_get_contents( $file );
if ($text===false) return false;
$info['text']= $text;
$info['timestamp']= wfTimestamp( TS_MW, filemtime( $file ) );
$info['id']= 0;
}
else if ( !$this->wikiDB ) {
$text= $this->loadTextHTTP( $title, $ns );
$info['text']= $text;
$info['timestamp']= wfTimestampNow();
$info['id']= 0;
}
else {
$info= $this->loadInfoFromDB($title, $ns);
}
return $info;
}
function & queryPages( $max = NULL, $from = NULL, $since = NULL ) {
if ( !$this->wikiDB ) {
wsfLog("can't list pages, no database connected!" , LL_WARN );
return false;
}
wsfLog("listing pages from $from, since $since, limit $max..." , LL_DEBUG );
$ttext= $this->tableName( 'text' );
$trevision= $this->tableName( 'revision' );
$tpage= $this->tableName( 'page' );
$sql= "
SELECT page_title, rev_timestamp
FROM $trevision
JOIN $tpage ON rev_id = page_latest
WHERE page_namespace = 0
";
// + JOIN $ttext ON rev_text_id = old_id
if ( $from ) $sql .= " AND page_id >= " . $this->wikiDB->addQuotes( $from ) ;
if ( $since ) $sql .= " AND rev_timestamp > " . $this->wikiDB->addQuotes( wfTimestamp(TS_MW, $since) ) ;
$sql .= " ORDER BY page_id ";
if ( $max ) $sql .= " LIMIT $max ";
//print "*** $sql ***";
$res= $this->wikiDB->query( $sql, 'WikiAccess::queryPages' );
wsfLog("got listing cursor." , LL_DEBUG );
return $res;
}
function webApi($params) {
$u = dirname($this->baseURL) . '/api.php';
$params['format'] = 'php';
#print_r($params);
$first = true;
foreach ($params as $k => $v) {
if ($first) {
$u.= '?';
$first = false;
}
else $u.= '&';
if (is_array($v)) $v = implode('|', $v);
$v = urlencodeTitle($v);
$u .= $k;
$u .= '=';
$u .= $v;
}
#print "===> $u\n";
$data = file_get_contents($u);
if (!$data) return false;
#print "===> $data\n";
$data = unserialize($data);
$error = @$data['error'];
if ($error) {
wsfLog("web api returned an error: code=\"{$error['code']}\" info=\"{$error['info']}\"", LL_ERROR);
return false;
}
return $data;
}
function webApiList($titles, $list, $params = NULL, $continue = false) {
$params['action'] = 'query';
$params['titles'] = $titles;
$params['list'] = $list;
if (!isset($params['prop'])) $params['prop'] = 'info';
$data = NULL;
while (true) {
$res = $this->webApi($params);
if (!$res) break;
#print_r($res);
if (!$data) $data = $res['query'][$list];
else {
$data = array_merge($data, $res['query'][$list]);
#print "MERGED!\n";
}
if ($continue && @$res['query-continue'][$list][$continue]) {
$params[$continue] = $res['query-continue'][$list][$continue];
#print "CONTINUE: {$params[$continue]}\n";
}
else {
break;
}
}
return $data;
}
function makeLinkEntry($link, $label = NULL, $trail = NULL) {
global $wsgLanguages;
if (!is_string($link)) wfDebugDieBacktrace("paramer link must be a string! found: ".wsfToString($link));
$idx= 0;
$lang= NULL;
$ns= NULL;
$section= NULL;
$escaped= false;
if ($link && strpos($link,'&')!==false) $link= Sanitizer::decodeCharReferences( $link );
if ($label && strpos($label,'&')!==false) $label= Sanitizer::decodeCharReferences( $label );
if ($trail && strpos($trail,'&')!==false) $trail= Sanitizer::decodeCharReferences( $trail );
# split at first ':'
$p= array();
preg_match('/^((:)?([^:]+):)?(.*)/',$link,$p);
if ( $p[1] && $p[3] ) {
$n= $this->lc($p[3]);
$n= trim($n);
$l= trim($p[4]);
$idx= $this->getNsIndex( $n );
if ( $idx && $idx != $n ) {
#if ($n=='en' || $n=='de') print "INDEX OF $n: $idx";
# Ordinary namespace
$ns = $n;
$link= $l;
if ($p[2]) $escaped= true;
} elseif( in_array($n,$wsgLanguages) ) { #TODO: check interwiki-table?! --> use title object...
$lang = $n;
$link= $l;
if ($p[2]) $escaped= true;
}
#else {
# wsfLog("colon in title, not namespace, not interwiki: $link",LL_DEBUG);
#}
#FIXME: handle non-language interwikis (wikitionary, etc)
}
if ( $ns ) $ns= $this->lc($ns);
if ($label && ($idx != NS_CATEGORY || $escaped || $label!==' ')) {
$label= trim($label);
}
if ($label!==' ') {
if ( !empty($trail) ) {
if ( empty($label) ) $label= $link;
$label .= $trail;
}
if ( $label ) $label= stripMarkup( $label );
if ( $label===false || $label==='' ) $label= NULL;
}
# split at first '#'
$p= explode('#',$link,2);
if ( sizeof($p) > 1 ) {
$section= trim($p[1]);
$link= trim($p[0]);
#wsfLog("hash in title (section link): $link # $section",LL_DEBUG);
}
if ($label===NULL || $label==='') $label= $link;
$link= $this->asTitle( $link );
if ($label && $label!==' ' && $idx!=NS_CATEGORY) $label= $this->asLabel( $label );
if ($section!==NULL) $section= $this->asSectionTitle( $section );
return array(
'lang' => $lang,
'escaped' => $escaped,
'namespace' => $ns,
'namespacenum' => $idx,
'link' => $link,
'section' => $section,
'label' => $label,
);
}
function extractLinks($text) {
$links= array();
preg_match_all( $this->getLinkPattern(), $text, $links, PREG_SET_ORDER );
$res= array();
#print ">>>> ".$this->termBlacklist."\n";
foreach ( $links as $match ) {
$lnk= trim($match[1]);
$label= @$match[2];
if ( $label ) $label= substr($label,1);
$e= $this->makeLinkEntry($lnk,$label,@$match[3]);
$res[]= $e;
}
return $res;
}
function hasTemplateClasses() {
if ($this->templateClasses) return true;
else return false;
}
function getTemplateClass($title) {
if (!$this->templateClasses) return NULL;
$title= $this->asDBKey($title);
$nst= $this->getNSText(NS_TEMPLATE);
$title= preg_replace('!^('.$nst.'|msg):!i','',$title);
foreach ($this->templateClasses as $cls => $pattern) {
if (preg_match($pattern,$title)) return $cls;
}
return NULL;
}
function canClassifyTags() {
return $this->hasTemplateClasses() || $this->hasTagCategories();
}
function classifyTag($template, $tocheck = NULL, $useClasses = true) {
/*
if (is_null($tagcat) && $this->hasTagCategories()) {
#TODO: look up categories of this tag, cache them.
}
#TODO: cache tag class!
if (is_string($tagcat)) $tagcat= explode('|',$tagcat);
*/
if ($this->hasTagCategories()) {
$check= $tocheck;
if (!$check) $check= array_keys($this->tagCategories);
foreach ($check as $cls) {
if ($this->isTagInCategory($template, $cls)) return $cls;
}
}
if ($this->hasTemplateClasses() && ($useClasses || !$this->hasTagCategories())) {
$check= $tocheck;
if (!$check) $check= array_keys($this->templateClasses);
foreach ($check as $cls) {
$pattern= @$this->templateClasses[$cls];
if ($pattern && preg_match($pattern,$template)) return $cls;
}
}
return NULL;
}
function wikiToText( $text, $strip = true ) {
global $wgCanonicalNamespaceNames;
$text= preg_replace('!__\w+__!','',$text);
$text= preg_replace('!^---+!smi','',$text);
$text= preg_replace('!\[\[([^|\]]+)\|([^\]]+)\]\]!','\2',$text);
$text= preg_replace('!\[\[([^\]]+:)?([^\]]+)\]\]!','\2',$text);
$text= preg_replace('!\[\w+:[^\s]+\]!','',$text);
$text= preg_replace('!\[\w+:[^\s]+\s+([^]]+)\]!','\1',$text);
$text= preg_replace('!^=+\s*(.*?)\s*=+\s*$!smi',"\$ \\1\n",$text);
$text= preg_replace('!^(:+)!smi',"\t",$text);
$text= preg_replace('~(?<!\n)\n(?![\n:*#<])~smi'," ",$text);
#$text= preg_replace("!'''(.*?)'''!s",'\1',$text);
#$text= preg_replace("!''(.*?)''!s",'\1',$text);
if ($strip) $text= stripMarkup($text);
$text= trim($text);
return $text;
}
function stripBoxes( $text ) {
global $wgCanonicalNamespaceNames;
#print "\n\n$text\n\n";
#NOTE: normalize header/footer templates, etc
if (isset($this) && $this->boxNormalizer) {
$f= $this->boxNormalizer;
$text= $f($text);
}
$text= preg_replace('/<!--.*-->|__\w+__/','',$text); #HACK: strip one-line comments. Hack to get comments before {|
$t= preg_split('~(^\{\||\n\{\||\n\|\}|\{\{|\}\}|<div.*?>|<gallery.*?>|<center.*?>|</div\s*>|</gallery\s*>|</center\s*>|<!--|-->)~si',$text,-1,PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
/*$t= preg_split('~</?(div|gallery)(\s+[^>]*)?>~si',$text,-1,PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
*/
#print_r($t);
if (sizeof($t)>=2) {
$stack= array();
$top= NULL;
foreach ($t as $i => $s) {
$push= NULL;
if (preg_match('~<div(\s+[^>]*)?>~',$s)) $push= '</div\s*>';
else if (preg_match('~<gallery(\s+[^>]*)?>~',$s)) $push= '</gallery\s*>';
else if (preg_match('~<center(\s+[^>]*)?>~',$s)) $push= '</center\s*>';
else if (strlen($s)>5);
else if ($s==="\n{|" || ($i<2 && $s==="{|")) $push= '\n\|\}';
else if ($s==="{{") $push= '\}\}';
else if ($s==="<!--") $push= "-->";
#print " - $s\n";
if ($push) {
$top= $push;
$stack[]= $push;
#print "--> push: $push\n";
}
if ($stack || trim($s)==='') {
#print "## ignore[$i]: ".$t[$i]." ## ignored ##\n";
unset($t[$i]);
#$t[$i]= "## ignored($top) ##";
}
else {
#print "--> keep.\n";
continue;
}
if ($push) continue;
if (preg_match("~^$top\$~",$s)) {
$old= array_pop($stack);
if (!$stack) $top= NULL;
else $top= $stack[sizeof($stack)-1];
#print "--> pop: $old; new: $top\n";
}
}
#print_r($t);
$text= implode('',$t);
}
#print "=========================================================================\n";
if (isset($this) && $this->boxPattern) $text= preg_replace($this->boxPattern,'',$text);
/*
$text= preg_replace('"<!--.*?-->"si','',$text);
$text= preg_strip('~\{\{(\{(?!\{)|\}(?!\})|[^\{\}]*)*\}\}~s',$text);
$text= preg_strip('~(^|\n)\{\|[^\n]*(\n(?!(\|\}|\{\|))[^\n]*)*\n\|\}~',$text);
$text= preg_strip('~<div(\s+[^>]*)?>(<(?!/?div[^<>]*>)|[^<]*)*</div\s*>~si',$text);
*/
$text= preg_replace('!^---+ *$!mi','',$text);
if (isset($wgCanonicalNamespaceNames)) {
#print "NS_IMAGE: ".$this->getNsText(NS_IMAGE).";\n";
if (isset($this)) $ptrn= $wgCanonicalNamespaceNames[NS_IMAGE].'|'.$wgCanonicalNamespaceNames[NS_CATEGORY].'|'.$this->getNsText(NS_IMAGE).'|'.$this->getNsText(NS_CATEGORY);
else $ptrn= $wgCanonicalNamespaceNames[NS_IMAGE].'|'.$wgCanonicalNamespaceNames[NS_CATEGORY];
$ptrn= '!\[\[ *('.$ptrn.') *:(?>[^\|\]]+)(\|((?>[^\[\]]+)|\[\[(?>[^\]]+)\]\]|\[(?>[^\]]+)\])*)?\]\]!i';
#print "PTRN: $ptrn;\n";
$text= preg_replace($ptrn,'',$text);
#print "removed images.\n";
}
#print "strip interwiki\n";
$text= preg_replace('!^\s*(\[\[\s*[-\w]+\s*:\s*[^\|\]]*?\]\]\s*)+$!m','',$text);
#print "removed interwiki.\n";
$text= trim($text);
return $text;
}
function asPlainText( $text ) {
$text= $this->stripBoxes($text);
$text= $this->wikiToText($text);
return $text;
}
function asCanonicalTitle( $text ) {
if (preg_match('/^(.*?)[\s_]*\([^)]*\)\s*$/',$text,$m)) $text= $m[1];
$text= str_replace('_',' ',$text);
$text= preg_replace('/\pP/u',' ',$text);
$text= preg_replace('/\pZ+/u',' ',$text);
$text= $this->lc($text);
$text= iconv('utf-8','us-ascii//TRANSLIT',$text);
return $text;
}
function getSecondsSinceLastEdit( $html = false ) {
/*
$sql= 'SELECT time_to_sec(timediff(now(),rc_timestamp)) FROM ' . $this->tableName('recentchanges') . ' ORDER BY rc_timestamp DESC LIMIT 1;';
$res= $this->wikiDB->query($sql, 'getSecondsSinceLastEdit');
$row = $this->wikiDB->fetchRow($res);
if (!$row) return false;
$lag= (int)$row[0];
*/
if ($html) $lag= wsfGetReplagHTML($this->dbServerId, 'estimated lag for '.$this->domain);
return $lag;
}
function getImageRecord( $name ) {
$name= $this->asDBKey($name);
#$this->trace(4,"loading image info for $title from [".$wiki->domain."]...");
$sql= "SELECT *
FROM " . $this->tableName('image') . "
WHERE img_name = " . $this->wikiDB->addQuotes( $name );
$res= $this->wikiDB->query($sql, 'getImageRecord');
if ( ! ($info = $this->wikiDB->fetchObject($res)) ) {
#$this->trace(4,"failed to load image info for $title from [".$wiki->domain."]");
$info= false;
}
$this->wikiDB->freeResult($res);
return $info;
}
function getThumbnailInfo($entry, $maxwidth=NULL, $maxheight=NULL) {
global $wsgThumbnailRedirector;
if (is_string($entry)) {
$e= $this->getImageRecord($entry);
if ($e) $entry= $e;
else $entry= array( 'img_name' => $entry );
}
if (is_object($entry)) $entry= get_object_vars($entry);
#NOTE: use default thumb size, as for mediawiki galleries.
if (!$maxwidth && !$maxheight) $maxwidth= 120;
if ($maxwidth && !$maxheight) $maxheight= $maxwidth;
else if ($maxheight && !$maxwidth) $maxwidth= $maxheight;
if (isset($entry['img_name'])) $name= $entry['img_name'];
else if (isset($entry['name'])) $name= $entry['name'];
else if (isset($entry['title'])) $name= $entry['title'];
else if (isset($entry['image'])) $name= $entry['image'];
else if (isset($entry['page_title'])) $name= $entry['page_title'];
else return NULL;
if (isset($entry['img_width'])) $width= $entry['img_width'];
else if (isset($entry['width'])) $width= $entry['width'];
else $width= 0;
if (isset($entry['img_height'])) $height= $entry['img_height'];
else if (isset($entry['height'])) $height= $entry['height'];
else $height= 0;
$title= $this->getNSText(NS_IMAGE) . ':' . $name;
$lnk= $this->baseURL . "?title=" . urlencodeTitle( $title );
if ($wsgThumbnailRedirector) $flnk= $wsgThumbnailRedirector . '?f=' . urlencodeTitle( $name ) . '&domain' . $this->domain;
else $flnk= $this->baseURL . "?title=Special:Filepath/" . urlencodeTitle( $name );
if (preg_match('/\.(jpe?g|gif|png|svg)$/i',$name)
&& $width>0 && $height>0) {
$thumbheight= $maxheight;
if ($thumbheight>$height) {
$thumbheight= $height;
}
$thumbwidth= round($width * $thumbheight / $height);
if ($thumbwidth > $maxwidth) {
$thumbwidth= $maxwidth;
#FIXME: ugly hack: thumb.php refuses to give full size images!
if (!$wsgThumbnailRedirector && $width<=$thumbwidth) $thumbwidth= $width -1;
$thumbheight= round($thumbwidth * $height / $width);
}
#FIXME: ugly hack: thumb.php refuses to give full size images!
else if (!$wsgThumbnailRedirector && $width<=$thumbwidth) {
$thumbwidth= $width -1;
$thumbheight= ceil($thumbwidth * $height / $width);
}
#print "({$row->img_width}x{$row->img_height} => {$thumbwidth}x{$thumbheight})";
if ($wsgThumbnailRedirector) {
$u= "$wsgThumbnailRedirector?f=".urlencodeTitle($name)."&domain={$this->domain}&w=$maxwidth&h=$maxheight";
}
else {
$u= "http://{$this->domain}{$this->info->script_path}thumb.php?f=".urlencodeTitle($name)."&w=$thumbwidth";
if ( !preg_match('/\.(jpe?g|gif|png)$/i',$name) ) {
$u.= "&r=1";
}
}
$info= array(
'name' => $this->asTitle($name),
'title' => $this->asTitle($title),
'pageURL' => $lnk,
'fileURL' => $flnk,
'thumbURL' => $u,
'thumbWidth' => $thumbwidth,
'thumbHeight' => $thumbheight,
);
}
else {
$info= array(
'name' => $this->asTitle($name),
'title' => $this->asTitle($title),
'pageURL' => $lnk,
'fileURL' => $flnk,
'thumbURL' => NULL,
);
}
return $info;
}
function getThumbnailHTML($entry, $maxwidth=NULL, $maxheight=NULL, $subtitle=false) {
if (!is_array($entry) || !isset($entry['fileURL'])) $info= $this->getThumbnailInfo($entry, $maxwidth, $maxheight);
else $info = $entry;
if (!$info) return NULL;
if ($subtitle===true) $subtitle= $info['name'];
$lnk= $info['pageURL'];
if (@$info['thumbURL']) {
$u= $info['thumbURL'];
$thumbwidth= $info['thumbWidth'];
$thumbheight= $info['thumbHeight'];
$dim = "width='$thumbwidth' height='$thumbheight'";
$html= "";
$html.= "<a href='$lnk'>";
$html.= "<img src='$u' border='0' alt='".escapeHtml($info['name'])."' $dim/>";
$html.= "</a>";
if ($subtitle) {
$html.= "<br/>";
$html.= "<small>";
$html.= "<a href='$lnk'>".escapeHtml($subtitle)."</a>";
$html.= "</small>";
}
}
else {
$html= "<a href='$lnk'>".escapeHtml($info['name'])."</a>";
if (@$info['fileURL']) {
$html.= " <small>(<a href='{$info["fileURL"]}'>file</a>)</small>";
}
}
return $html;
}
function getRandomPage( $ns = NULL ) {
$sql= 'select page_title, page_namespace
from ' . $this->tableName('page') . ' use index(page_random)
where page_is_redirect = 0
and page_random > '.(rand()/getrandmax());
if ($ns!==NULL) $sql.= ' and page_namespace = ' . (int)$ns;
$sql.= ' order by page_random limit 1';
$res= $this->wikiDB->query($sql, 'getRandomPage');
$p= NULL;
if ($row= $this->wikiDB->fetchRow($res)) {
if ($ns===NULL) $p= wsfStripIntKeys($row);
else $p= $row['page_title'];
}
$this->wikiDB->freeResult($res);
return $p;
}
function getCategories( $title, $ns = NULL ) {
if ($ns===NULL && strpos($title,':')) {
$e= $this->makeLinkEntry();
$title= $e['link'];
$ns= $e['namespacenum'];
}
if (!is_int($ns) && !is_numeric($ns)) $ns= $this->getNsIndex($ns);
$title= $this->asDBKey($title);
$sql= "SELECT cl_to as category
FROM " . $this->tableName('page') . "
JOIN " . $this->tableName('categorylinks') . " ON cl_from = page_id
WHERE page_title = " . $this->wikiDB->addQuotes($title) . "
AND page_namespace = ".(int)$ns; #TODO: handle redirects?!
$sql.= " ORDER BY category";
$res= $this->wikiDB->query($sql, 'getCategories');
$found= array();
while ($row= $this->wikiDB->fetchRow($res)) {
$found[]= $row['category'];
}
$this->wikiDB->freeResult($res);
return $found;
}
function getCategoryContents( $category, $filterns = NULL ) {
if ($filterns && !is_int($filterns) && !is_numeric($filterns)) $filterns= $this->getNsIndex($filterns);
$category= $this->asDBKey($category);
$sql= "SELECT page_title, page_namespace
FROM " . $this->tableName('page') . "
JOIN " . $this->tableName('categorylinks') . " ON cl_from = page_id
WHERE cl_to = " . $this->wikiDB->addQuotes($category);
if ($filterns !== NULL) $sql.= " AND page_namespace = ".(int)$filterns;
$sql.= " ORDER BY page_namespace, page_title";
#TODO: resolve redirects??
$res= $this->wikiDB->query($sql, 'getCategoryContent');
$found= array();
while ($row= $this->wikiDB->fetchRow($res)) {
if ($filterns===NULL) {
$row= wsfStripIntKeys($row);
$found[]= $row;
}
else $found[]= $row['page_title'];
}
$this->wikiDB->freeResult($res);
return $found;
}
function getSubcategories( $cats, $depth= 3, $max= 512 ) {
#global $wsgCatScanText;
if (!is_array($cats)) $todo= preg_split('! *[;/\|] *!',$cats);
else $todo= $cats;
if ($depth<=1) return $todo;
if ($depth>100) $depth= 100;
if ($max>1024) $max= 1024;
#wsfLog($wsgCatScanText->msg('recursing_categories'),LL_INFO);
#FIXME: check if category exists!
$key= implode('|',$todo);
$done= array();
$i= 0;
while ($todo && $i<$depth && sizeof($done)<$max) {
$i+= 1;
$this->collectSubcategories( $todo, $done, "collectDeepCategories($key, $i)", "Subcategories of $key, level $i" );
#FIXME: resolve real and pseudo-redirects!
}
#if ($todo && $i<$depth) wsfLog("WARNING: scan aborted after ".sizeof($done)." categories ($i levels)",LL_WARN);
return $done;
}
function collectSubcategories( &$todo, &$done, $cachekey=NULL, $cachedescr=NULL ) {
if (!$todo) return array();
$data= false;
$cache= NULL;
if ($cachekey) {
if (!$cachedescr) $cachedescr= $cachekey;
require_once('QCache.php');
$cache=& QCache::newWithoutQuery($this->wikiDB->getProperty('mDBname'), "collectDeepCategories" /* use legacy function name... */, $cachedescr, 60*60*24, $cachekey);
#$cache->debug= true;
$data= $cache->load( true );
}
if ( $data===false ) {
$sql= "SELECT page_id, page_title
FROM " . $this->tableName('page') . "
JOIN " . $this->tableName('categorylinks') . " ON cl_from = page_id
WHERE cl_to in " . $this->makeSQLSet($todo, true) . "
AND page_namespace = ".NS_CATEGORY."
AND page_is_redirect = 0"; #FIXME: handle redirects!
$res= $this->wikiDB->query($sql, 'collectSubcategories');
$found= array();
while ($row= $this->wikiDB->fetchRow($res)) {
if ( $cache && $cache->cacheDB ) $data[]= $row;
$found[]= $row['page_title'];
}
$this->wikiDB->freeResult($res);
if ( $cache && $cache->cacheDB ) {
$cache->putData( $data );
}
}
else {
#FIXME: make sure this is noted!
#wsfLog("NOTE: using cached results for $cachedescr, {$cache->queryRecord->age} seconds old",LL_WARN);
$found= array();
foreach ($data as $row) {
$found[]= $row['page_title'];
}
}
$done= array_merge($done,$todo);
$todo= array_diff($found,$done);
return $found;
}
function listCategories( $cats, $ns = NULL, $limit = 1024, $depth = 0, $max = 512 ) {
if (!is_array($cats)) $cats= preg_split('! *[;/\|] *!',$cats);
if ($depth) $cats= $this->getSubcategories($depth, $max);
$where= '';
if ($ns !== NULL) {
if (!is_int($ns) && !is_numeric($ns)) $ns= $this->getNsIndex($ns);
$where .= ' page_namespace = ' . (int)$ns;
}
if ($where) $where= ' AND '.$where;
$sql= "SELECT page_id, page_title, page_namespace
FROM " . $this->tableName('page') . "
JOIN " . $this->tableName('categorylinks') . " ON cl_from = page_id
WHERE cl_to in " . $this->makeSQLSet($cats, true) . "
".$where."
GROUP BY page_id";
if ($limit) $sql.= " LIMIT ".(int)$limit;
$res= $this->wikiDB->query($sql, 'listCategories');
$found= array();
while ($row= $this->wikiDB->fetchRow($res)) {
$row= wsfStripIntKeys($row);
$found[]= $row;
}
$this->wikiDB->freeResult($res);
return $found;
}
function makeSQLSet( $items, $titles = false ) {
$s= '';
foreach ($items as $t) {
if ($s) $s.= ', ';
if (is_int($t)) $s.= (int)$t;
else if (is_float($t)) $s.= (float)$t;
else if (is_bool($t)) $s.= $t ? 1 : 0;
else if ($t === NULL) $s.= 'NULL';
else {
if ($titles) $t= $this->asDBKey($t);
$s.= $this->wikiDB->addQuotes($t);
}
}
return "( $s )";
}
function hasTagCategories( ) {
return $this->tagCategories !== false;
}
function isTagInCategory( $tag, $type ) {
if ( $this->tagCategories === false ) return NULL;
$tag= $this->asDBKey( $tag );
$type= $this->asDBKey( $type );
if ( !$this->tagsByCategory || !isset($this->tagsByCategory[ $type ]) ) {
$this->listTags( $type );
}
if ( !$this->tagsByCategory || !isset($this->tagsByCategory[ $type ]) ) {
return NULL;
}
#print "<pre>";
#print "(($tag IN $type))\n";
#print_r($this->tagsByCategory[ $type ]);
#print "</pre>";
return in_array( $tag, $this->tagsByCategory[ $type ] );
}
function isTagCategory( $cat, $type ) {
if ( $this->tagCategories === false ) return NULL;
$cat= $this->asDBKey( $cat );
$type= $this->asDBKey( $type );
foreach ( $this->tagCategories as $t => $c ) {
if ($c == $cat && $t == $type ) return true;
}
return false;
}
function listTags( $type ) {
if ( $this->tagCategories === false || !$this->wikiDB ) return NULL;
$type= $this->asDBKey($type);
if ( $this->tagsByCategory && isset($this->tagsByCategory[ $type ]) ) {
return $this->tagsByCategory[ $type ];
}
#print "\n(( WikiAccess::listTags($type) ))\n\n";
#print_r($this->tagCategories);
#print "[ ".$this->tagCategories[ $type ]." ]\n";
if (isset( $this->tagCategories[ $type ] ) ) $cat= $this->tagCategories[ $type ];
else $cat= $type;
$sql= "select T.page_title as name, NULL as tgt
from " . $this->tableName('page') . " as T
join " . $this->tableName('categorylinks') . " on cl_from = T.page_id
where T.page_namespace = ".NS_TEMPLATE." and cl_to = ".$this->wikiDB->addQuotes($cat)."
union
select R.page_title as name, T.page_title as tgt
from " . $this->tableName('page') . " as T
join " . $this->tableName('categorylinks') . " on cl_from = T.page_id
join (select * from " . $this->tableName('pagelinks') . "
join " . $this->tableName('page') . " on page_id = pl_from
where page_is_redirect = 1 and page_namespace = ".NS_TEMPLATE.") as R
on R.pl_namespace = ".NS_TEMPLATE." and R.pl_title = T.page_title
where T.page_namespace = ".NS_TEMPLATE." and cl_to = ".$this->wikiDB->addQuotes($cat)."
";
$tags= array();
#print "*** $sql ***\n";
require_once('QCache.php');
$qc= new QCache( $this->wikiDB, 'WikiAccess::listTags', "list tags in category $cat", 60*60*24, $sql );
$qc->query();
while ( $row = $qc->fetchRow() ) {
$tags[]= $row['name'];
}
$qc->freeResult();
$this->tagsByCategory[ $type ]= $tags;
#print "TAGS( $type / $cat ):\n";
#print_r( $tags );
return $tags;
}
function isStopword($w) {
if ($this->stopwords===false) return false;
#apply configuration and defaults
if (!@$this->stopwords['_init']) {
global $wsgStopwords, $wsgStopwordDB;
$conf= @$wsgStopwords[$this->lang];
if ($conf) {
foreach ($conf as $k => $v) {
$this->stopwords[$k]= $v; #TODO: handle "overwrite" vs. "fallback" behavior
}
}
if (!isset($this->stopwords['length'])) $this->stopwords['length']= 2;
if (!isset($this->stopwords['pattern'])) $this->stopwords['pattern']= '/^(\d+|.*\.)$/';
if (!isset($this->stopwords['database'])) $this->stopwords['database']= $wsgStopwordDB;
if (isset($this->stopwords['database']) && preg_match('/^\w+$/',$this->stopwords['database'])) $wsgStopwordDB.= '/'.$this->stopwords['database']; #FIXME: hack! is $wsgStopwordDB a full db path, or only a server-url?
$this->stopwords['_init']= true;
}
#print "LENGTH ($w): ".wsfToString(strlen($w)<=$this->stopwords['length'])."\n";
#print "PATTERN ($w): ".wsfToString(preg_match($this->stopwords['pattern'],$w))."\n";
#print "LENGTH ($w): ".wsfToString(in_array($this->lc($w),$this->stopwords['words']))."\n";
if (isset($this->stopwords['length']) && strlen($w)<=$this->stopwords['length']) return true;
if (isset($this->stopwords['pattern']) && preg_match($this->stopwords['pattern'],$w)) return true;
if (isset($this->stopwords['words']) && in_array($this->lc($w),$this->stopwords['words'])) return true;
if (!$this->stopwordDB && isset($this->stopwords['database'])) {
$db= $this->stopwords['database'];
if ($this->stopwords['database']) $this->stopwordDB=& openConnection($this->stopwords['database']);
}
#FIXME: bulk query!
if ($this->stopwordDB) {
if (isset($this->stopwords['query_exists'])) $q= $this->stopwords['query_exists'];
else {
$table= isset($this->stopwords['table']) ? $this->stopwords['table'] : $this->lang.'_stopwords';
$field= isset($this->stopwords['field']) ? $this->stopwords['field'] : 'word';
$q= "SELECT EXISTS ( SELECT * FROM $table WHERE $field = %f)";
}
$q= str_replace('%f', $this->stopwordDB->addQuotes($w), $q );
$res= $this->stopwordDB->query($q,'isStopword');
if ($row= $this->stopwordDB->fetchRow($res)) {
if ($row[0]) return true;
}
$this->stopwordDB->freeResult($res);
return false;
}
}
function & stripStopwords(&$ww) {
#FIXME: bulk query!
$sz= sizeof($ww);
for ($i=0; $i<$sz; $i+= 1) {
if ($this->isStopword($ww[$i])) unset($ww[$i]);
}
$ww= array_values($ww);
$ww= array_unique($ww);
return $ww;
}
function wordSplit($w) {
$w= preg_replace('~^[\s.:,;!?*#+]+|[\s.:,;!?*#+]+$~','',$w);
$w= preg_replace('~[%$§&/()[\]{}@|.:,;!?*#+_…]+|\s+-\s-|--+~',' ',$w);
$a= preg_split('/\s+/',$w,-1,PREG_SPLIT_NO_EMPTY);
return $a;
}
}
#if (sizeof($argv)>1) {
# $txt= file_get_contents($argv[1]);
# $txt= WikiAccess::stripBoxes($txt);
# print $txt;
#}
if (defined('WS_TEST_WIKIACCESS')) {
$w= WikiAccess::newInstance($args[0]);
/*
for($i=2; $i<sizeof($args); $i+= 1) {
$tags= $w->listTags( $args[$i] );
print_r( $tags );
}
*/
//print "DATE: ".$w->formatDate(time(),true)." | ".$w->formatDate(time(),false)."\n";
/*
$ok= $w->liveLogin($args[2], $args[3]);
if ($ok) {
$ok= $w->liveCheckLogin($args[2], $args[3]);
if (!$ok) print "FAILED TO DETECT LOGIN!\n";
}
if ($ok) {
$e= $w->livePrepareEdit($args[4]);
$e= $w->liveSubmitEdit($e, $e['text']."\n".$args[5],$args[6]);
}
*/
$w->initTranslations();
print "LANGUAGE: {$w->language}\n";
#print "FULL DATE: ".$w->formatDate(time(), true)."\n";
#print "SHORT DATE: ".$w->formatDate(time(), false)."\n";
for ($i=1; $i<sizeof($args); $i+= 1) {
/*$ns = $args[$i];
$nsnum = $w->getNsIndex($ns);
print '* ' . $ns . "; ";
print 'NS: ' . $w->getNsText( $nsnum ) . '; ';
print 'TALK: ' . $w->getTalkPage( $w->getNsText( $nsnum ) ) . '; ';
print 'TALK: ' . $w->getNsText( $nsnum + 1);
print "\n";
*/
$ns = $args[$i];
if (is_numeric($ns)) $ns = (int)$ns;
else $ns = $w->getNsIndex($ns);
$t = $w->getNsText($ns);
print '* ' . $args[$i] . ' => ' . $ns . ' => ' . $t . "; talk: " . $w->getTalkPage($args[$i] . ':XXX') . " == " . $w->getTalkPage($t . ':XXX') . " \n";
}
}
?>
WikiAccess.php
application/x-php, 82842 bytes (load raw)

