<?
define("WS_ADMIN",true);
require_once( "WSInit.php" );
require_once( "WikiAccess.php" );

$watchTags= '';

function parseFeed( $h ) {
    global $wiki;
    $state= 0; #scan for <entry>
    
    if (is_string($h)) {
        $h= fopen($h,'r');
        $closeh= true;
    }
    else $closeh= false;
    
    $title= NULL;
    $time= NULL;
    $summary= NULL;
    $user= NULL;

    while (true) {
        $s= fgets($h);
        if ($s==='' || $s===false || $s===NULL) break;
        
        $s= trim($s);
        
        #print "** [$state] ** $s\n";
        
        #recovery
        if ($s==='<entry>') {
            $title= NULL;
            $time= NULL;
            $summary= array();
            $state= 0; #scan for <entry>
        }
        
        switch ($state) {
            case 0: #scan for <entry>
                if ($s==='<entry>') $state= 10; #scan for <id>
            break;
            
            case 10: #scan for <id>
                if (preg_match('!<id>.*?/wiki/([\w ]+:)?(.*?)</id>!',$s,$m)) {
                    if ($m[1]==='Image:') {
                        $title= html_entity_decode($m[2]);
                        $state= 20; #scan for <updated>
                    }
                    else {
                        #print "X $title\n";
                        $state= 0; #scan for <entry>
                    }
                }
            break;
            
            case 20: #scan for <updated>
                if (preg_match('!<updated>([- \dTZ:]+)</updated>!',$s,$m)) {
                    $time= wfTimestamp(TS_MW,$m[1]);
                    #print "T $time\n";
                    $state= 30; #scan for <summary>
                }
            break;
            
            case 30: #scan for <summary>
                if (preg_match('!^<summary type="html">(.*)$!',$s,$m)) {
                    $summary[]= html_entity_decode($m[1]);
                    $state= 40; #scan for </summary>
                }
            break;
            
            case 40: #scan for </summary>
                if (preg_match('!^(.*)</summary>$!',$s,$m)) {
                    $summary[]= html_entity_decode($m[1]);
                    $state= 50; #scan for <author>
                }
                else {
                    $summary[]= html_entity_decode($s);
                }
            break;
            
            case 50: #scan for <author>
                if (preg_match('!^<author><name>(.*?)</name></author>!',$s,$m)) {
                    $user= html_entity_decode($m[1]);
                    $state= 0; #scan for <entry>
                    
                    $a= parseFeedSummary( $summary );
                    if (!$a['action']) continue;
                    
                    $row= new stdclass();
                    $row->rc_timestamp= $time;
                    $row->rc_title= $wiki->asDBKey($title);
                    $row->rc_user_text= $user;
                    $row->rc_action= $a['action'];
                    $row->rc_comment= $a['comment'];
                    $row->rc_last_oldid= $a['oldid'];
                    $row->rc_this_oldid= $a['newid'];
                    
                    printEntry($row);
                }
            break;
        }
    } 
    
    if ($closeh) fclose($h);
}

function parseFeedSummary( $summary ) {
    $state= 0; #scan for comment
    
    $comment= NULL;
    $oldid= NULL;
    $newid= NULL;
    
    $added= '';
    $removed= '';

    foreach ($summary as $s) {
        if ($s==='') continue;
        if ($s==='</table>') break;
        
        if (preg_match('/<!-- diff cache key \w+:diff:oldid:(\d+):newid:(\d+) -->/',$s,$m)) {
            $oldid= $m[1];
            $newid= $m[2];
            break;
        }
        
        #if ($state>=140) print "++ [$state] ++ $s\n";
        
        switch ($state) {
            case 0: #scan for comment
                if (preg_match('!^<p>(.*)</p>$!',$s,$m)) {
                    $comment= $m[1];
                    $state= 10; //scan for first <tr>
                }
            break;
            
            case 10: #scan for first <tr>
                if ($s==='<tr>') $state= 20; #scran for diff <tr>
            break;
            
            case 20: #scan for diff <tr>
                if ($s==='<tr>') $state= 100; #scran for first <td> in diff
            break;
            
            case 100: #scran for first <td> in diff
                if (preg_match('!^<td>( |&nbsp;)</td>$!',$s,$m)) {
                    $state= 20; #scan for diff <tr>
                }
                else if (preg_match('!^<td>-</td>$!',$s,$m)) {
                    $state= 120; #scan for second <td> in diff
                }
                else if (preg_match('!^<td colspan="2"[^>]*>( |&nbsp;)</td>$!',$s,$m)) {
                    $state= 130; #scan for third <td> in diff
                }
                else if (preg_match('!^<td colspan="2"[^>]*>Line: \d+</td>$!',$s,$m)) {
                    $state= 20; #scan for diff <tr>
                }
                else {
                    #error
                    $state= 20; #scan for diff <tr>
                }
            break;
            
            case 120: #scran for second <td> in diff
                if (preg_match('!^<td style=".*?">(.*?)</td>$!',$s,$m)) {
                    $removed.= "\n".$m[1];
                    $state= 130; #scan for third <td> in diff
                }
                else if (preg_match('!^<td style=".*?">(.*?)$!',$s,$m)) {
                    $removed.= "\n".$m[1];
                    $state= 124; #scan for second </td> in diff
                }
                else {
                    #error
                    $state= 20; #scan for diff <tr>
                }
            break;
            
            case 124: #scran for second </td> in diff
                if (preg_match('!^(.*?)</td>$!',$s,$m)) {
                    $removed.= "\n".$m[1];
                    $state= 130; #scan for third <td> in diff
                }
                else {
                    $removed.= "\n".$s;
                }
            break;
            
            case 130: #scran for third <td> in diff
                if (preg_match('!^<td>( |&nbsp;)</td>$!',$s,$m)) {
                    #error
                    $state= 20; #scan for diff <tr>
                }
                else if (preg_match('!^<td>\+</td>$!',$s,$m)) {
                    $state= 140; #scan for forth <td> in diff
                }
                else if (preg_match('!^<td colspan="2"[^>]*>( |&nbsp;)</td>$!',$s,$m)) {
                    $state= 20; #scan for diff <tr>
                }
                else {
                    #error
                    $state= 20; #scan for diff <tr>
                }
            break;
            
            case 140: #scran for fourth <td> in diff
                if (preg_match('!^<td style=".*?">(.*?)</td>$!',$s,$m)) {
                    $added.= "\n".$m[1];
                    $state= 20; #scan for diff <tr>
                }
                else if (preg_match('!^<td style=".*?">(.*?)$!',$s,$m)) {
                    $added.= "\n".$m[1];
                    $state= 144; #scan for diff <tr>
                }
                else {
                    #error
                    $state= 20; #scan for diff <tr>
                }
            break;
            
            case 144: #scran for fourth </td> in diff
                if (preg_match('!^(.*?)</td>$!',$s,$m)) {
                    $added.= "\n".$m[1];
                    $state= 20; #scan for third <td> in diff
                }
                else {
                    $added.= "\n".$s;
                }
            break;
            
        }
    } 
    
    $added= preg_replace('!<span style="[^">]*">.*?</span>!','',trim($added));
    $removed= preg_replace('!<span style="[^">]*">.*?</span>!','',trim($removed));
    
    $action= getEditActionFromChange($removed, $added);
    
    return array(
        'oldid' => $oldid,
        'newid' => $newid,
        'action' => $action,
        'comment' => $comment,
    );
}

function printEntry($row) {
    print $row->rc_timestamp;
    print "\t";
    print $row->rc_title;
    print "\t";
    print $row->rc_user_text;
    print "\t";
    print $row->rc_action;
    print "\t";
    print $row->rc_comment;
    print "\t";
    print $row->rc_this_oldid;
    print "\t";
    print $row->rc_last_oldid;
    print "\n";
}

function getEditActionFromText( $old, $new ) {
    $old= extractTags($old);
    $new= extractTags($new);
    
    #print_r($old);
    #print_r($new);
    
    if ($old == $new) return NULL;
    
    $r= array_diff($old,$new);
    $a= array_diff($new,$old);
    
    if (!$r && !$a) return NULL;
    
    return makeEditActionFromTage($r, $a);
}
    
function getEditActionFromChange( $removed, $added ) {
    $r= extractTags($removed);
    $a= extractTags($added);
    
    if ($r && $a) {
        $r= array_diff($r, $a);
        $a= array_diff($a, $r);
    }
    
    if (!$r && !$a) return NULL;
    
    return makeEditActionFromTage($r, $a);
}
    
function makeEditActionFromTage( $r, $a ) {
    if (!$r && !$a) return NULL;
    $action= '';
    
    if ($a) {
        if ($action!=='') $action.= ' ';
        $action.= '+ ';
        $action.= implode('|',$a);
    }
    
    if ($r) {
        if ($action!=='') $action.= ' ';
        $action.= '- ';
        $action.= implode('|',$r);
    }
    
    return $action;
}

function extractTags( $text ) {
    global $wiki, $watchTags;

    preg_match_all( '/\{\{\s*([^|\]\[\{\}]+?)(\s*\|[^|]*)*?\s*\}\}/is', $text, $tags, PREG_PATTERN_ORDER );
    $tags= $tags[1];
    
    $r= array();
    foreach ($tags as $t) {
        $t= $wiki->asDBKey($t);
        
        if ($watchTags) {
            if (is_array($watchTags)) {
                $ok= false;
                foreach($watchTags as $wt) {
                    if (preg_match($wt,$t)) {
                        #print "MATCH: $wt  -->  $t\n";
                        $ok= true;
                        break;
                    }
                }
                
                if (!$ok) continue;
            }
            else {
                if (!preg_match($watchTags,$t)) continue;
                #else print "MATCH: $wt  -->  $t\n";
            }
        }
        
        $r[]= $t;
    }
    
    $r= array_unique($r);
    return $r;
}

$since= @$options['since'];
$limit= @$options['limit'];

if ($since) $since= wfTimestamp(TS_MW,$since);

$wiki= WikiAccess::newInstance('commons.wikimedia.org');

$watchTags= array(
    $wiki->templateClasses['delete'],
    $wiki->templateClasses['problem'],
);

$u= $wiki->baseURL.'?title=Special:Recentchanges&feed=atom';
if ($since) $u.= '&from='.$since;
if ($limit) $u.= '&limit='.$limit;
else $u.= '&limit=2000'; #fixme: meight miss some...

parseFeed($u);
exit;

if ($since) $since= " AND rc_timestamp > ".$wiki->wikiDB->addQuotes($since);
else $since= '';

$sql= 'SELECT rc_type, rc_timestamp, rc_user_text, rc_namespace, rc_title, rc_comment, rc_cur_id, rc_this_oldid, rc_last_oldid 
       FROM recentchanges 
       WHERE ( (rc_namespace = 6 AND rc_type = 0) 
          OR (rc_type = 3 AND rc_title = "Log/delete" AND rc_comment like \'deleted \"[[Image:%\') )
          '.$since.'
       ORDER BY rc_timestamp ASC
';

if ($limit) $sql.= ' LIMIT ' . (int)$limit; 

#print "*** $sql ***";

$res= $wiki->wikiDB->query($sql, 'CommonsTicker');

while ($row = $wiki->wikiDB->fetchObject($res)) {
    if ($row->rc_type == 3) {
        if (!preg_match('/^deleted "\[\[Image:(.*?)\]\]":\s*(.*?)\s*$/', $row->rc_comment, $m)) {
            wsfLog("BAD log comment: {$row->rc_comment}",LL_WARNING);
            continue;
        }
        
        $row->rc_namespace= 6; #image
        $row->rc_title= $wiki->asDBKey($m[1]);
        $row->rc_comment= trim($m[2]);
        $row->rc_type= 100;
        $row->rc_action= 'deleted';
    }
    else if ($row->rc_type == 0) {
        wsfLog("fetching records for {$row->rc_title} ({$row->rc_timestamp})",LL_INFO);
        $old= $wiki->fetchPageRecord($row->rc_title, $row->rc_namespace, $row->rc_last_oldid);
        $new= $wiki->fetchPageRecord($row->rc_title, $row->rc_namespace, $row->rc_this_oldid);
        
        if (!$old) {
            wsfLog("failed to load old revision #{$row->rc_last_oldid} for {$row->rc_title}",LL_WARNING);
            continue;
        }
        
        if (!$new) {
            wsfLog("failed to load new revision #{$row->rc_this_oldid} for {$row->rc_title}",LL_WARNING);
            continue;
        }
        
        $row->rc_action= getEditActionFromText( $old['text'], $new['text'] );
        if (!$row->rc_action) continue;
    }
    else {
        $row->rc_action= 'other';
    }
    
    printEntry($row);
}

$wiki->wikiDB->freeResult($res);

?>