<?
define("WS_ADMIN",true);
require_once( "../common/WSInit.php" );
require_once( "WikiAccess.php" );
$watchTags= '';
$ignoreTags= '';
function parseFeed( $h, $since = 0 ) {
global $wiki, $cwiki, $monitorTags, $pinboards;
$state= 0; #scan for <entry>
if (is_string($h)) {
wsfLog("opening feed $h...",LL_VERBOSE);
$h= fopen($h,'r');
$closeh= true;
}
else $closeh= false;
$maxtime = $since;
$title= NULL;
$fulltitle= NULL;
$time= NULL;
$namespace= NULL;
$type= NULL;
$summary= NULL;
$user= NULL;
wsfLog("reading feed...",LL_VERBOSE);
while (true) {
$s= fgets($h);
if ($s==='' || $s===false || $s===NULL) break;
$s= trim($s);
#if ($state>10) print "** [$state - $title] ** $s\n"; #DEBUG
#recovery
if ($s==='<entry>') {
$title= NULL;
$fulltitle= NULL;
$namespace= NULL;
$time= NULL;
$summary= array();
$state= 0; #scan for <entry>
}
switch ($state) {
case 0: #scan for <entry>
if ($s==='<entry>') $state= 10; #scan for <id>
break;
case 10: #scan for <id>
if (preg_match('!<id>.*?/wiki/([\w ]+:)?(.*?)</id>!',$s,$m)) {
$title= html_entity_decode($m[2]);
$title= $wiki->asDBKey($title);
$fulltitle= $title;
if ($m[1]) $fulltitle= $wiki->asDBKey(html_entity_decode($m[1])).$fulltitle;
if ($pinboards && in_array($t, $pinboards)) {
$namespace= $m[1] ? $cwiki->getNsIndex(substr($m[1],0,strlen($m[1])-1)) : 0;
$state= 20; #scan for <updated>
}
else if ($m[1]==='Image:') {
$namespace= NS_IMAGE;
$state= 20; #scan for <updated>
}
else if ($monitorTags && $m[1]==='Template:') {
$namespace= NS_TEMPLATE;
$state= 20; #scan for <updated>
}
else {
#print "X $title\n";
$state= 0; #scan for <entry>
}
}
break;
case 20: #scan for <updated>
if (preg_match('!<updated>([- \dTZ:]+)</updated>!',$s,$m)) {
$time= wfTimestamp(TS_MW,$m[1]);
if ($time > $maxtime) $maxtime= $time;
if ($time<$since) $state= 0; #TODO: abort?!
else $state= 30; #scan for <summary>
#print "T $time\n";
}
break;
case 30: #scan for <summary>
if (preg_match('!^<summary type="html">(.*)$!',$s,$m)) {
$summary[]= html_entity_decode($m[1]);
$state= 40; #scan for </summary>
}
break;
case 40: #scan for </summary>
if (preg_match('!^(.*)</summary>$!',$s,$m)) {
$summary[]= html_entity_decode($m[1]);
$state= 50; #scan for <author>
}
else {
$summary[]= html_entity_decode($s);
}
break;
case 50: #scan for <author>
if (preg_match('!^<author><name>(.*?)</name></author>!',$s,$m)) {
$user= html_entity_decode($m[1]);
$state= 0; #scan for <entry>
$a= parseFeedSummary( $namespace, $title, $fulltitle, $summary );
if (!$a['type'] || !$a['action']) continue;
$row= new stdclass();
$row->rc_timestamp= $time;
$row->rc_namespace= $namespace;
$row->rc_title= $title;
$row->rc_user_text= $user;
$row->rc_action= $a['action'];
$row->rc_action_type= $a['type'];
$row->rc_comment= $a['comment'];
$row->rc_last_oldid= @$a['oldid'];
$row->rc_this_oldid= @$a['newid'];
putEntry($row);
}
break;
}
}
if ($closeh) fclose($h);
return $maxtime;
}
function parseFeedSummary( $ns, $title, $fulltitle, $summary ) {
$state= 0; #scan for comment
$comment= NULL;
$oldid= NULL;
$newid= NULL;
$added= '';
$removed= '';
foreach ($summary as $s) {
$s= trim($s);
if ($s==='') continue;
if ($s==='</table>') break;
#FIXME: get the diff link a better way... this comment is not always there...
if (preg_match('/<!-- diff cache key \w+:diff:oldid:(\d+):newid:(\d+) -->/',$s,$m)) {
$oldid= $m[1];
$newid= $m[2];
break;
}
#print "++ [$state] ++ $s\n";
switch ($state) {
case 0: #scan for comment
if (preg_match('!^<p>(.*)</p>$!',$s,$m)) {
$comment= $m[1];
$state= 10; //scan for first <tr>
}
#recovery...
if ($s==='<tr>') $state= 20; #scran for diff <tr>
break;
case 10: #scan for first <tr>
if ($s==='<tr>') $state= 20; #scran for diff <tr>
break;
case 20: #scan for diff <tr>
if ($s==='<tr>') $state= 100; #scran for first <td> in diff
break;
case 100: #scran for first <td> in diff
if (preg_match('!^<td>( | )</td>$!',$s,$m)) {
$state= 20; #scan for diff <tr>
}
else if (preg_match('!^<td>-</td>$!',$s,$m)) {
$state= 120; #scan for second <td> in diff
}
else if (preg_match('!^<td colspan="2"[^>]*>( | )</td>$!',$s,$m)) {
$state= 130; #scan for third <td> in diff
}
else if (preg_match('!^<td colspan="2"[^>]*>Line: \d+</td>$!',$s,$m)) {
$state= 20; #scan for diff <tr>
}
else {
#error
$state= 20; #scan for diff <tr>
}
break;
case 120: #scran for second <td> in diff
if (preg_match('!^<td (style|class)=".*?">(.*?)</td>$!',$s,$m)) {
$removed.= "\n".$m[2];
$state= 130; #scan for third <td> in diff
}
else if (preg_match('!^<td (style|class)=".*?">(.*?)$!',$s,$m)) {
$removed.= "\n".$m[2];
$state= 124; #scan for second </td> in diff
}
else {
#error
$state= 20; #scan for diff <tr>
}
break;
case 124: #scran for second </td> in diff
if (preg_match('!^(.*?)</td>$!',$s,$m)) {
$removed.= "\n".$m[1];
$state= 130; #scan for third <td> in diff
}
else {
$removed.= "\n".$s;
}
break;
case 130: #scran for third <td> in diff
if (preg_match('!^<td>( | )</td>$!',$s,$m)) {
#error
$state= 20; #scan for diff <tr>
}
else if (preg_match('!^<td>\+</td>$!',$s,$m)) {
$state= 140; #scan for forth <td> in diff
}
else if (preg_match('!^<td colspan="2"[^>]*>( | )</td>$!',$s,$m)) {
$state= 20; #scan for diff <tr>
}
else {
#error
$state= 20; #scan for diff <tr>
}
break;
case 140: #scran for fourth <td> in diff
if (preg_match('!^<td (style|class)=".*?">(.*?)</td>$!',$s,$m)) {
$added.= "\n".$m[2];
$state= 20; #scan for diff <tr>
}
else if (preg_match('!^<td (style|class)=".*?">(.*?)$!',$s,$m)) {
$added.= "\n".$m[2];
$state= 144; #scan for diff <tr>
}
else {
#error
$state= 20; #scan for diff <tr>
}
break;
case 144: #scran for fourth </td> in diff
if (preg_match('!^(.*?)</td>$!',$s,$m)) {
$added.= "\n".$m[1];
$state= 20; #scan for third <td> in diff
}
else {
$added.= "\n".$s;
}
break;
}
}
$added= preg_replace('!<span (style|class)="[^">]*">(.*?)</span>!','\2',trim($added));
$removed= preg_replace('!<span (style|class)="[^">]*">(.*?)</span>!','\2',trim($removed));
$added= preg_replace('/<!--.*?-->/is','',$added);
$removed= preg_replace('/<!--.*?-->/is','',$removed);
$added= html_entity_decode($added);
$removed= html_entity_decode($removed);
#print " ==> \n\t\t+ $added\n\t\t- $removed";
$a= getEditActionFromChange($ns, $title, $fulltitle, $removed, $added);
return array(
'oldid' => $oldid,
'newid' => $newid,
'type' => @$a['type'],
'action' => @$a['action'],
'comment' => $comment,
);
}
function getLastTimestamp() {
global $db;
$ts= array();
wsfLog("looking for last timestamp...",LL_VERBOSE);
$sql= 'SELECT MAX(timestamp) FROM commons_ticker WHERE type = "tagged"';
$res= $db->query($sql, 'getLastTimestamp#tagged');
$row= $db->fetchRow($res);
if (!$row) $ts['rc']= 0;
else $ts['rc']= $row[0];
$db->freeResult($res);
$sql= 'SELECT MAX(timestamp) FROM commons_ticker WHERE type != "tagged"';
$res= $db->query($sql, 'getLastTimestamp#not_tagged');
$row= $db->fetchRow($res);
if (!$row) $ts['log']= 0;
else $ts['log']= $row[0];
$db->freeResult($res);
if (!$ts['log']) $ts['log']= $ts['rc'];
if (!$ts['rc']) $ts['rc']= $ts['log'];
$ts['feed']= $ts['rc'];
#print_r($ts);
return $ts;
}
function createTickerTable($drop=false) {
global $db;
if ($drop) {
$db->query('DROP TABLE commons_ticker', 'createTickerTable#drop');
wsfLog('deleted commens_ticker table',LL_WARN);
}
$sql= 'CREATE TABLE IF NOT EXISTS commons_ticker (
id INT AUTO_INCREMENT NOT NULL,
timestamp CHAR(14) NOT NULL,
namespace INT(4) NOT NULL,
title VARBINARY(255) NOT NULL,
user VARBINARY(255),
type ENUM ( "unknown", "deleted", "replaced", "tagged", "redir", "recat", "notify", "changed", "status", "restored" ) NOT NULL,
action VARBINARY(255) NOT NULL,
comment VARBINARY(255) NOT NULL,
oldrev INT,
newrev INT,
PRIMARY KEY (id),
UNIQUE KEY rev (newrev, oldrev),
UNIQUE KEY page (namespace, title, timestamp, user),
INDEX timestamp (timestamp, type),
INDEX user (user, timestamp),
INDEX type (type)
)';
$db->query($sql, 'createTickerTable');
wsfLog('created commens_ticker table',LL_INFO);
}
function putEntry($row) {
global $db, $ignore;
if (!$row || !$row->rc_timestamp) return;
if ($ignore) {
#print_r($row);
foreach ($ignore as $e) {
#print_r($e);
if (@$e['user'] && $e['user'] != $row->rc_user_text) continue;
if (isset($e['namespace']) && $e['namespace'] != $row->rc_namespace) continue;
if (@$e['title'] && !preg_match($e['title'], $row->rc_title)) continue;
if (@$e['comment'] && ($row->rc_comment==NULL || !preg_match($e['comment'], $row->rc_comment))) continue;
#matches ignore record
return;
}
}
if (!$db) printEntry($row);
else {
$sql= 'INSERT IGNORE INTO commons_ticker SET
timestamp = '.$db->addQuotes($row->rc_timestamp).',
namespace = '.$db->addQuotes($row->rc_namespace).',
title = '.$db->addQuotes($row->rc_title).',
user = '.$db->addQuotes($row->rc_user_text).',
type = '.$db->addQuotes($row->rc_action_type).',
action = '.$db->addQuotes($row->rc_action).',
comment = '.$db->addQuotes($row->rc_comment!==NULL ? preg_replace('/\s+/si',' ',$row->rc_comment) : '').',
oldrev = '.($row->rc_last_oldid ? $row->rc_last_oldid : 'NULL').',
newrev = '.($row->rc_this_oldid ? $row->rc_this_oldid : 'NULL').'
';
$db->query($sql, 'putEntry');
wsfLog("{$row->rc_timestamp}\t{$row->rc_title}:\t{$row->rc_user_text}\t{$row->rc_action}\t".preg_replace('/\s+/si',' ',$row->rc_comment),LL_TRACE);
}
}
function printEntry($row) {
if (!$row || !$row->rc_timestamp) return;
print $row->rc_timestamp;
print "\t";
print $row->rc_namespace;
print "\t";
print $row->rc_title;
print "\t";
print $row->rc_user_text;
print "\t";
print $row->rc_action_type;
print "\t";
print $row->rc_action;
print "\t";
print "\"".preg_replace('/\s+/',' ',$row->rc_comment)."\"";
print "\t";
print $row->rc_last_oldid ? $row->rc_last_oldid : '';
print "\t";
print $row->rc_this_oldid ? $row->rc_this_oldid : '';
print "\n";
}
function getEditActionFromText( $ns, $title, $fulltitle, $old, $new ) {
#FIXME: for pinboard handling, we need a real diff!!!
return getEditActionFromChange( $ns, $title, $fulltitle, $old, $new );
}
function getEditActionFromChange( $ns, $title, $fulltitle, $removed, $added ) {
global $pinboards, $monitorTags;
if ($pinboards && in_array($fulltitle, $pinboards)) {
#TODO: detect added
return NULL;
}
$diff= extractMetaDiff( $ns, $title, $fulltitle, $removed, $added );
if (!$diff) return NULL;
#DEBUG:
#print_r($diff);
$type= NULL;
$action= NULL;
if ( $ns == NS_IMAGE ) {
if ( @$diff['-tags'] || @$diff['+tags']) {
$type= 'tagged';
$action= makeEditAction(@$diff['-tags'], @$diff['+tags']);
}
}
else if ( $ns == NS_TEMPLATE ) {
if ( @$diff['-tags'] || @$diff['+tags']) {
$type= 'tagged';
$action= makeEditAction(@$diff['-tags'], @$diff['+tags']);
}
else if ( @$diff['-cats'] || @$diff['+cats']) {
$type= 'recat';
$action= makeEditAction(@$diff['-cats'], @$diff['+cats']);
}
else if ( @$diff['!cats'] || @$diff['*cats']) {
$type= 'recat';
$action= makeEditAction(@$diff['!cats'], @$diff['*cats'], '!', '*');
}
else if ( @$diff['-redir'] || @$diff['+redir']) {
$type= 'redir';
$action= makeEditAction(@$diff['-redir'], @$diff['+redir']);
}
}
$a= array(
'type' => $type,
'action' => $action,
);
return $a;
}
function makeEditAction( $r, $a, $rpre = '-', $apre = '+' ) {
if (!$r && !$a) return NULL;
$action= '';
if ($a) {
if ($action!=='') $action.= ' ';
$action.= $apre . ' ';
$action.= is_array($a) ? implode('|',$a) : $a;
}
if ($r) {
if ($action!=='') $action.= ' ';
$action.= $rpre . ' ';
$action.= is_array($r) ? implode('|',$r) : $a;
}
return $action;
}
function extractMetaDiff( $ns, $title, $fulltitle, $removed, $added ) {
global $wiki;
global $monitorTags, $watchTags, $ignoreTags;
$diff= array();
if ($ns == NS_TEMPLATE) {
#check if this *is* or *was* a license tag
if (tagMatch($title, $monitorTags)) {
$is_license= true;
wsfLog("KNOWN license tag: $title",LL_TRACE);
}
else {
$r= extractCategories($removed, $monitorTags);
if ($r) $is_license= true;
else $is_license= false;
if ($is_license) wsfLog("WAS a license tag: $title (".wsfToString($r).")",LL_TRACE);
else wsfLog("NOT a license tag: $title",LL_TRACE);
}
}
if ( ($ns == NS_TEMPLATE && $is_license) || $ns == NS_IMAGE ) {
// -- tags --
$r= extractTags($removed, $watchTags, $ignoreTags);
$a= extractTags($added, $watchTags, $ignoreTags);
if ($r && $a) {
$rn= array_diff($r, $a);
$an= array_diff($a, $r);
$r= $rn;
$a= $an;
}
if ($r) $diff['-tags']= $r;
if ($a) $diff['+tags']= $a;
}
if ($ns == NS_TEMPLATE && $is_license) {
// -- good categories --
$r= extractCategories($removed, $monitorTags);
$a= extractCategories($added, $monitorTags);
if ($r && $a) {
$rn= array_diff($r, $a);
$an= array_diff($a, $r);
$r= $rn;
$a= $an;
}
if ($r) wsfLog("removed good categories from $title: ".wsfToString($r),LL_TRACE);
if ($a) wsfLog("added good categories to $title: ".wsfToString($a),LL_TRACE);
if ($r) $diff['!cats']= $r;
if ($a) $diff['*cats']= $a;
// -- bad categories --
$r= extractCategories($removed, $watchTags);
$a= extractCategories($added, $watchTags);
if ($r && $a) {
$rn= array_diff($r, $a);
$an= array_diff($a, $r);
$r= $rn;
$a= $an;
}
if ($r) wsfLog("removed bad categories from $title: ".wsfToString($r),LL_TRACE);
if ($a) wsfLog("added bad categories to $title: ".wsfToString($a),LL_TRACE);
if ($r) $diff['-cats']= $r;
if ($a) $diff['+cats']= $a;
// -- redirects --
if (preg_match(REDIRECT_PATTERN, $removed, $m)) {
$r= $m[1];
$r= $wiki->makeLinkEntry($r);
if ($r['lang']===NULL && $r['namespacenum']===NS_TEMPLATE) {
$r= $r['link'];
}
else $r= NULL;
#TODO: handle all redirects to non-license templates; handle good vs. bad
if ($r && !$wiki->isTagCategory( $r, 'problem' ) && !$wiki->isTagCategory( $r, 'deprecated' )) $r= NULL;
}
if (preg_match(REDIRECT_PATTERN, $added, $m)) {
$a= $m[1];
$a= $wiki->makeLinkEntry($a);
if ($a['lang']===NULL && $a['namespacenum']===NS_TEMPLATE) {
$a= $a['link'];
}
else $a= NULL;
#TODO: handle all redirects to non-license templates; handle good vs. bad
if ($a && !$wiki->isTagCategory( $a, 'problem' ) && !$wiki->isTagCategory( $a, 'deprecated' )) $a= NULL;
}
if ($r!=$a) {
if ($r) wsfLog("removed redir from $title: ".wsfToString($r),LL_TRACE);
if ($a) wsfLog("set redir on $title: ".wsfToString($a),LL_TRACE);
if ($r) $diff['-redir']= $r;
if ($a) $diff['+redir']= $a;
}
}
return $diff;
}
function extractTags( $text, $watchTags=NULL, $ignoreTags=NULL ) {
global $wiki;
$text= preg_replace('!<nowiki>.*?</nowiki>!is','',$text);
$text= preg_replace('/<!--.*?-->/is','',$text);
preg_match_all( '/\{\{\s*([^|\]\[\{\}]+?)(\s*\|[^|]*)*?\s*\}\}/is', $text, $tags, PREG_PATTERN_ORDER );
$tags= $tags[1];
$r= array();
foreach ($tags as $t) {
if ($wiki) $t= $wiki->asDBKey($t);
if ($watchTags && !tagMatch($t, $watchTags)) continue;
if ($ignoreTags && tagMatch($t, $ignoreTags)) continue;
$r[]= $t;
}
$r= array_unique($r);
return $r;
}
function extractCategories( $text, $watchTags=NULL, $ignoreTags=NULL ) {
global $wiki;
$text= preg_replace('!<nowiki>.*?</nowiki>!is','',$text);
$text= preg_replace('/<!--.*?-->/is','',$text);
$nspattern= $wiki->getNsText(NS_CATEGORY);
$nspattern= str_replace(' ','[ _]',$nspattern);
$ptrn= '/\[\[\s*'.$nspattern.'\s*:\s*([^|\]]+?)(\s*\|[^|]*)*?\s*\]\]/is';
preg_match_all( $ptrn, $text, $cats, PREG_PATTERN_ORDER );
$cats= $cats[1];
if (!$cats) return NULL;
$r= array();
foreach ($cats as $t) {
if ($wiki) $t= $wiki->asDBKey($t);
if ($watchTags && !catMatch($t, $watchTags)) continue;
if ($ignoreTags && catMatch($t, $ignoreTags)) continue;
$r[]= $t;
}
$r= array_unique($r);
return $r;
}
function tagMatch($t, $tags) {
global $wiki;
if (!$tags) return false;
if ($wiki) $t= $wiki->asDBKey($t);
if (!is_array($tags)) $tags= array( $tags );
$ok= false;
foreach($tags as $wt) {
if (preg_match('/^%(.*)$/',$wt,$m)) {
if ($wiki->isTagInCategory($t,$m[1])) {
$ok= true;
break;
}
}
else if (preg_match($wt,$t)) {
$ok= true;
break;
}
}
return $ok;
}
function catMatch($t, $types) {
global $wiki;
if (!$types) return false;
if ($wiki) $t= $wiki->asDBKey($t);
if (!is_array($types)) $types= array( $types );
$ok= false;
foreach($types as $ty) {
if (preg_match('/^%(.*)$/',$ty,$m)) {
if ($wiki->isTagCategory($t,$m[1])) {
$ok= true;
break;
}
}
else if (preg_match($ty,$t)) {
#HACK: this assumes that the category name == template name
$ok= true;
break;
}
}
return $ok;
}
function makeSqlRegExp($regs) {
if (is_array($regs)) {
$r= '';
foreach ($regs as $rx) {
$rx= preg_replace('!^([^\w\d])(.*)\1(\w*)$!','\2',$rx);
if ($r!=='') $r.= '|';
$r.= $rx;
}
}
else {
$r= preg_replace('!^([^\w\d])(.*)\1(\w*)$!','\2',$regs);
}
return $r;
}
function processLog($updates, $deletions, $since, $limit) {
global $wiki, $wsgTableNames;
$tuser= isset($wsgTableNames['user']) ? $wsgTableNames['user'] : 'user';
if ($since) $wsince= " AND log_timestamp > ".$wiki->wikiDB->addQuotes($since);
else $wsince= '';
$types= '';
if ($updates) $types.= '"upload"';
if ($updates && $deletions) $types.= ', ';
if ($deletions) $types.= '"delete"';
if (!$types) return;
$sql= 'SELECT *
FROM logging
JOIN '.$tuser.' ON user_id = log_user
WHERE log_type IN ( '.$types.' )
AND log_namespace = '.NS_IMAGE.'
'.$wsince.'
ORDER BY log_timestamp ASC
';
if ($limit) $sql.= ' LIMIT ' . (int)$limit;
#print "*** $sql ***";
wsfLog("querying Logging table...",LL_VERBOSE);
$res= $wiki->wikiDB->query($sql, 'CommonsTicker');
wsfLog("query complete, analyzing.",LL_VERBOSE);
$maxtime = $since;
while ($row = $wiki->wikiDB->fetchObject($res)) {
$row->rc_namespace= $row->log_namespace;
$row->rc_title= $row->log_title;
$row->rc_user_text= $row->user_name;
$row->rc_comment= trim($row->log_comment);
$row->rc_timestamp= $row->log_timestamp;
$row->rc_last_oldid= 0;
$row->rc_this_oldid= 0;
if ($row->rc_timestamp > $maxtime) $maxtime= $row->rc_timestamp;
if ($row->log_type === 'delete') {
if ( $row->log_action == 'restore' ) {
$row->rc_type= 117;
$row->rc_action= 'restored';
$row->rc_action_type= 'restored';
}
else if ( preg_match('/\d{14}![^\s]+\.[^\s]+/', $row->rc_comment, $m) ) {
$row->rc_type= 111;
$row->rc_action= 'deletedRev';
$row->rc_action_type= 'deleted';
}
else {
$row->rc_type= 101;
$row->rc_action= 'deleted';
$row->rc_action_type= 'deleted';
}
}
else if ($row->log_type === 'upload') {
#see if there's something in oldimage
$sql= 'select * from oldimage where oi_name = '.$wiki->wikiDB->addQuotes($row->rc_title).
' and oi_timestamp < "'.$row->log_timestamp.'" order by oi_timestamp desc limit 1';
$r= $wiki->wikiDB->query($sql, 'CommonsTicker#updated');
$last= $wiki->wikiDB->fetchRow($r);
$wiki->wikiDB->freeResult($r);
#print_r($last);
#print "({$row->rc_user_text})";
if (!$last) {
wsfLog("Skipped fresh upload: {$row->log_title}, {$row->log_timestamp}",LL_DEBUG);
continue;
}
if ($last['oi_user_text'] == $row->rc_user_text) {
$row->rc_type= 112;
$row->rc_action= 'replacedOwn';
$row->rc_action_type= 'replaced';
}
else {
$row->rc_type= 102;
$row->rc_action= 'replaced';
$row->rc_action_type= 'replaced';
}
}
else {
wsfLog("Skipped log entry. Type: {$row->log_type}",LL_DEBUG);
continue;
}
putEntry($row);
}
$wiki->wikiDB->freeResult($res);
return $maxtime;
}
function processRC($since, $limit) {
global $wiki;
if ($since) $wsince= " AND rc_timestamp > ".$wiki->wikiDB->addQuotes($since);
else $wsince= '';
$sql= 'SELECT rc_type, rc_timestamp, rc_user_text, rc_namespace, rc_title, rc_comment, rc_cur_id, rc_this_oldid, rc_last_oldid
FROM recentchanges
WHERE (rc_namespace IN ( '.NS_IMAGE.', '.NS_TEMPLATE.' ) AND rc_type = 0)
'.$wsince.'
ORDER BY rc_timestamp ASC
';
if ($limit) $sql.= ' LIMIT ' . (int)$limit;
#print "*** $sql ***";
wsfLog("querying RC table...",LL_VERBOSE);
$res= $wiki->wikiDB->query($sql, 'CommonsTicker');
wsfLog("query complete, analyzing.",LL_VERBOSE);
$maxtime= $since;
while ($row = $wiki->wikiDB->fetchObject($res)) {
wsfLog("fetching records for {$row->rc_title} ({$row->rc_timestamp})",LL_VERBOSE);
$old= $wiki->fetchPageRecord($row->rc_title, $row->rc_namespace, $row->rc_last_oldid);
$new= $wiki->fetchPageRecord($row->rc_title, $row->rc_namespace, $row->rc_this_oldid);
if ($row->rc_timestamp > $maxtime) $maxtime= $row->rc_timestamp;
if (!$old) {
wsfLog("failed to load old revision #{$row->rc_last_oldid} for {$row->rc_title}",LL_WARN);
continue;
}
if (!$new) {
wsfLog("failed to load new revision #{$row->rc_this_oldid} for {$row->rc_title}",LL_WARN);
continue;
}
$fulltitle= $row->rc_title;
if ($row->rc_namespace) $fulltitle= $wiki->asDBKey($wiki->getNsText($row->rc_namespace)).':'.$fulltitle;
$a= getEditActionFromText( $row->rc_namespace, $row->rc_title, $fulltitle, $old['text'], $new['text'] );
if (!$a) continue;
$row->rc_action_type= @$a['type'];
$row->rc_action= @$a['action'];
if (!$row->rc_action_type) continue;
if (!$row->rc_action) continue;
putEntry($row);
}
$wiki->wikiDB->freeResult($res);
return $maxtime;
}
$since= @$options['since'];
$limit= @$options['limit'];
$stampfile= @$options['timestamp-file'];
if ($since && strlen($since)>1) $since= wfTimestamp(TS_MW, $since);
$updates= false;
$deletions= false;
$tags= false;
$feed= false;
if (isset($options['all'])) {
$updates= true;
$deletions= true;
$tags= true;
}
if (isset($options['feed'])) $feed= true;
if (isset($options['updates'])) $updates= true;
if (isset($options['deletions'])) $deletions= true;
if (isset($options['tags'])) $tags= true;
$pinboards= @$options['pinboard'];
if ($pinboards) $pinboards= explode('|',$pinboards);
$db= isset($options['db']);
if ($db===1 || $db===true) $db= $wsgAuxWikiDB;
if (isset($options['ll'])) $wsgLogLevel= $options['ll'];
if (!$tags && !$updates && !$deletions && !$feed) {
echo "specify at least one of --tags, --updates, --deletions, --feed, or --all\n";
exit(1);
}
if ($db) $db=& openConnection($db);
if ($db && (isset($options['create']) || isset($options['drop-and-create']))) {
createTickerTable(isset($options['drop-and-create']));
}
if (isset($options['update']) || @$options['since']===1 || @$options['since']===true) {
if ($stampfile && file_exists($stampfile)) {
wsfLog("loading timestamps from $stampfile", LL_VERBOSE);
$ts= file($stampfile);
if (!$ts) wsfLog("filed to load timestamps from $stampfile", LL_WARN);
else {
$since= array();
foreach ($ts as $s) {
$s= trim($s);
if ($s=='') continue;
if ($s[0]=='#') continue;
$s= preg_split('!(\s*[=,:]\s*|\t|\s+)!', $s, 2);
if (sizeof($s)<2) continue;
$since[$s[0]]= $s[1];
}
if (!@$since['rc']) @$since['rc']= @$since['log'];
if (!@$since['log']) @$since['log']= @$since['rc'];
if (!@$since['feed']) @$since['feed']= @$since['rc'];
}
}
else if ($db) $since= getLastTimestamp();
else $since= wfTimestamp(TS_MW, time() - 60*60*24); #default to 1 day ago
}
if ($since && is_string($since)) $since= wfTimestamp(TS_MW,$since);
if ($since && !is_array($since)) {
$ts= $since;
$since= array();
$since['feed']= $ts;
$since['log']= $ts;
$since['rc']= $ts;
}
if ($since) {
wsfLog("new entries since: {$since['feed']} (feed), {$since['log']} (log), {$since['rc']} (rc)", LL_VERBOSE);
}
$wiki= WikiAccess::newInstance('commons.wikimedia.org');
$watchTags= array(
#$wiki->templateClasses['delete'],
#$wiki->templateClasses['problem'],
#'/.*/', #DEBUG!!
'%Problem',
'%Delete',
);
$monitorTags= array(
$wiki->templateClasses['license'],
'%License',
#'/.*/', #DEBUG!!
);
#$ignoreTags= '/^(PD|ConvertToSVG|Convert_to_SVG)$/i'; #FIXME: use tag categories!
$ignoreTags= array(
);
$ignore= NULL;
if (@$options['ignore-file']) {
$ignore= array();
include($options['ignore-file']);
}
#print_r( parseFeedSummary( file('php://stdin') ) );
#exit;
$newsince= $since;
if ($feed) {
if (isset($options['feed']) && strlen($options['feed'])>1) $u= $options['feed'];
else {
$u= $wiki->baseURL.'?title=Special:Recentchanges&feed=atom';
if ($since) {
#TODO: add some grace (30 minutes?) to deal with feed cached
# note: can we rely on the feed comming "newest first"? that would help...
$u.= '&from='.$since['feed'];
}
if ($limit) $u.= '&limit='.$limit;
else $u.= '&limit=2000'; #fixme: might miss some...
}
$newsince['feed']= parseFeed($u, $since['feed']);
$newsince['rc']= $newsince['feed'];
$tags= false;
}
if ($tags) {
$newsince['rc']= processRC($since['rc'], $limit);
$newsince['feed']= $newsince['rc'];
}
if ($updates || $deletions) {
$newsince['log']= processLog($updates, $deletions, $since['log'], $limit);
}
wsfLog("new timestamps: feed={$newsince['feed']}; log={$newsince['log']}; rc={$newsince['rc']};", LL_VERBOSE);
if ($stampfile) {
$ok= file_put_contents(
$stampfile,
"feed={$newsince['feed']}
log={$newsince['log']}
rc={$newsince['rc']} " );
if (!$ok) wsfLog("filed to write timestamps to $stampfile: feed={$newsince['feed']}; log={$newsince['log']}; rc={$newsince['rc']};", LL_WARN);
}
$wiki->close();
?>updateCommonsTicker.php
application/x-php, 33181 bytes (load raw)

