root > WikiSense-trunk > tools > StructureDump.php

StructureDump.php

application/x-php, 10981 bytes (load raw)
<?
define("WS_ADMIN",true);
require_once( "../common/WSInit.php" );

require_once( "WikiAccess.php" );

function printTsvRow( $page ) {
        print (int)$page['page_id'] . "\t";
        print (int)$page['page_namespace']."\t";

        print ($page['page_is_redirect'] ? 'r' : '-') . "\t"; #TODO: redir-target from redir-table!

        print $page['page_title']."\t";
        print $page['page_touched']."\t";
       
        print "\n";
}

function getNodeAttributes( $page, $node = NULL ) {
    global $wiki;
   
    if (!is_array($page)) {
        $page= $wiki->fetchWikirecord( $page, NS_CATEGORY );
        return false;
    }

    $name= $page['page_title'];
    if ($page['page_namespace']) $name= $wiki->getNsText($page['page_namespace']).':'.$name;
   
    $s= '';   
    $s.= ' id="'.(int)$page['page_id'].'"';
    #$s.= ' page="'.escapeHtml($name, ENT_QUOTES).'"';
    $s.= ' namespace="'.(int)$page['page_namespace'].'"';
    $s.= ' title="'.escapeHtml($page['page_title'], ENT_QUOTES).'"';
    $s.= ' touched="'.escapeHtml($page['page_touched'], ENT_QUOTES).'"';
   
    if ($page['page_is_redirect']) $s.= ' redirect="yes"'; #TODO: get redirect target?!
    if ($node) $s.= ' node="'.escapeHtml($node, ENT_QUOTES).'"';
   
    return $s;
}

function getInsertStatement( $page, $level = 1 ) {
    global $wiki, $dbtable, $db;
   
    if (!is_array($page)) {
        $page= $wiki->fetchWikirecord( $page, NS_CATEGORY );
        return false;
    }
   
    #TODO: collect lines, make bulk-inserts!

    #$name= $page['page_title'];
    #if ($page['page_namespace']) $name= $wiki->getNsText($page['page_namespace']).':'.$name;
   
    $s= 'INSERT INTO '.$dbtable.' (id, namespace, title, touched, is_redirect, level) VALUES ( ';   
    $s.= (int)$page['page_id'].', ';
    #$s.= '"'.addslashes($name).'", ';
    $s.= (int)$page['page_namespace'].', ';
    $s.= '"'.addslashes($page['page_title']).'", ';
    $s.= '"'.addslashes($page['page_touched']).'", ';
    $s.= (int)$page['page_is_redirect'].', ';
    $s.= (int)$level.' ';
    $s.= ' ) ';
    $s.= 'ON DUPLICATE KEY UPDATE touched = VALUES(touched), is_redirect = VALUES(is_redirect), level = VALUES(level)';
   
    return $s;
}

function printNode( $page, $level = 1, $articles = true, $format = 'xml') {
    global $stoplist, $wiki, $db, $wdb, $depth, $redir;
   
    if ($depth && $level>($depth+1)) return;
   
    $dent= str_repeat('  ',$level);
   
    if (!is_array($page)) {
        if ( strpos($page, ':') !== false ) $ns= NULL;
        else $ns= NS_CATEGORY;
       
        $page= $wiki->fetchWikirecord( $page, $ns );
        if (!$page) return false;
    }

    if ($page['page_namespace'] != NS_CATEGORY) {
        if (!$articles) return;
       
        if ($format=='sql') {
            $sql= getInsertStatement($page, $level);
            if ($db) {
                $db->query($sql, 'printNode#insert');
                wsfLog('inserted: '.substr($sql,0,128).'...', LL_VERBOSE);
            }
            else print "$sql\n";
        }
        else if ($format=="xml") {
            print $dent.'<page '.getNodeAttributes($page).'/>'."\n";
        }
        else {
            if ( !isset($stoplist[$page['page_id']]) ) {
                printTsvRow($page);
            }
        }
    }
    else if ( isset($stoplist[$page['page_id']]) ) {
        if ($format=='xml') {
            print $dent.'<page '.getNodeAttributes($page, 'ref').'/>'."\n";
        }
    }
    else {
        $stoplist[$page['page_id']]= true;
       
        if ($format=='sql') {
            $sql= getInsertStatement($page, $level);
            if ($db) {
                $db->query($sql, 'printNode#insert');
                wsfLog('inserted: '.substr($sql,0,128).'...', LL_VERBOSE);
            }
            else print "$sql\n";
        }
        else if ($format == "xml") {
            print $dent.'<page '.getNodeAttributes($page).'>'."\n";
        }
        else {
            printTsvRow($page);
        }

        $sql= "SELECT page_id, page_title, page_namespace, page_touched, page_is_redirect
              FROM page
              JOIN categorylinks ON cl_from = page_id
              WHERE cl_to = "
. $wdb->addQuotes($page['page_title']);

        if ($redir) {
            $sql .= "UNION
              SELECT R.page_id, R.page_title, R.page_namespace, R.page_touched, R.page_is_redirect
              FROM page as P
              JOIN categorylinks ON cl_from = page_id
              JOIN pagelinks ON pl_namespace = P.page_namespace AND pl_title = P.page_title
              JOIN page as R ON R.page_id = pl_from
              WHERE cl_to = "
. $wdb->addQuotes($page['page_title']) ."
              AND R.page_namespace = P.page_namespace
              AND R.page_is_redirect > 0
            "
;
        }

        if (!$articles) $sql.= " AND page_namespace = ".NS_CATEGORY;
        $sql.= " ORDER BY page_namespace, page_title";
       
        $res= $wiki->wikiDB->query($sql, 'printNode');   
        while ($row= $wiki->wikiDB->fetchRow($res)) {
            printNode( $row, $level+1, $articles, $format);
        }
       
        $wiki->wikiDB->freeResult($res);
        if ($format=='xml') print $dent.'</page>'."\n";
    }
   
}

#--------------------------------------------------------

if (!isset($args[0])) {
    echo "USAGE: StructureDump.php <wiki> <category>";
    exit(1);
}

$lang= array_shift($args);
$cats= $args;

$articles= !@$options['catonly'];
$format= @$options['format'];
$depth= @$options['depth'];

$dbtable= @$options['table'];
$xtable= @$options['xtable'];
$dbname= @$options['insert'];
$dbtrunc= @$options['truncate'];
$dbcreate= @$options['create'];
$redir= @$options['redir'];

if ($dbtable && !$format) $format= 'sql';
if ($dbname && !$format) $format= 'sql';

if (!$format) $format= 'xml';

if ($format=='sql' && !$dbtable) die("missing --table option!\n");

$db= NULL;

$wiki= WikiAccess::newInstance( $lang );
$wdb =& $wiki->wikiDB;

if ($dbname) {
    if (preg_match('!^%|//!', $dbname)) {
        $db=& openConnection($dbname);
        if (!$wdb) die("failed to connect to database!\n");
        #else print "DB-CONNECTION OK\n";
    }
    else {
        $dbtable = "$dbname.$dbtable";
        $db=& $wdb;
    }

    if ($dbcreate) {
        $db->query("DROP TABLE IF EXISTS $dbtable",'StructureDump#drop');
       
        $db->query("CREATE TABLE IF NOT EXISTS $dbtable (
            id INT(12) NOT NULL,
            namespace MEDIUMINT NOT NULL,
            title VARCHAR(255) binary NOT NULL,
            touched CHAR(12) NOT NULL,
            is_redirect TINYINT DEFAULT 0,
            level TINYINT DEFAULT NULL,
            PRIMARY KEY (id),
            UNIQUE KEY title (namespace, title),
            KEY redirect (is_redirect, namespace),
            KEY touched (touched, namespace),
            KEY level (level, namespace)
        )"
,'StructureDump#create');
       
        print "CREATED TABLE $dbtable\n";
    }
    else {
        $db->query("TRUNCATE TABLE $dbtable",'StructureDump#truncate');
        print "TRUNCATED TABLE $dbtable\n";
    }
   
}

$stoplist= array();

if (!$cats) {
    $cat= @$wiki->root_category;
    if (!$cat) {
        echo "No root category known for {$wiki->domain}. Please specify!";
        $wiki->close();
        exit(1);
    }
   
    $cats= array( $cat );
}

if ($format=='sql') {
    $c= '';
   
    foreach ($cats as $i => $cat) {
        if ($i>0) $c.= ', ';
       
        $cat= $wiki->asDBKey($cat);
        $c.= $wdb->addQuotes($cat);
    }   
   
    print "initializing table...\n";
   
    $sql= "INSERT INTO $dbtable (id, namespace, title, touched, is_redirect, level)
           SELECT page_id, page_namespace, page_title, page_touched, page_is_redirect, 0 as level
           FROM {$wiki->dbname}.page
           WHERE page_namespace = "
.NS_CATEGORY." AND page_title IN ( ".$c." )";
 
    if ($db) $db->query($sql,'StructureDump#basecats');   
    else print "\n$sql\n";
   
    $i=0;
    while (!$depth || $i<$depth) {
        if (!$articles) $nswhere= ' AND page_namespace = ' . NS_CATEGORY;
        else $nswhere= '';
       
        print "pass ".($i+1)."...\n";
       
        $sql= "INSERT IGNORE INTO $dbtable (id, namespace, title, touched, is_redirect, level)
               SELECT page_id as i, page_namespace as n, page_title as t, page_touched as o, page_is_redirect as r, level as l
                 FROM (
                    SELECT page_id, page_namespace, page_title, page_touched, page_is_redirect, "
.($i+1)." as level
                    FROM {$wiki->dbname}.page
                    JOIN {$wiki->dbname}.categorylinks ON cl_from = page_id
                    JOIN $dbtable ON title = cl_to AND namespace = "
.NS_CATEGORY."
                    WHERE level = $i $nswhere ) as X"
;

        if ($db) {
            #print "*** $sql ***\n";             
            $db->query($sql,'StructureDump#collect');   
             
            $c= $db->affectedRows();
            print "$c rows inserted in pass ".($i+1)." \n";
       
            if (!$c) break;
        }
        else {
            print "\n$sql\n";
        }

        if ($redir) {
          $sql= "INSERT IGNORE INTO $dbtable (id, namespace, title, touched, is_redirect, level)
                 SELECT page_id as i, page_namespace as n, page_title as t, page_touched as o, page_is_redirect as r, level as l
                   FROM (
                      SELECT R.page_id, R.page_namespace, R.page_title, R.page_touched, R.page_is_redirect, T.level
                      FROM $dbtable
                      JOIN {$wiki->dbname}.pagelinks ON pl_namespace = T.namespace AND pl_title = T.title
                      JOIN {$wiki->dbname}.page as R ON R.page_id = pl_from
                      WHERE T.level = "
.($i+1)."
                      AND T.namespace = R.namespace
                      AND R.page_is_redirect > 0 ) as X"
;

          if ($db) {
              #print "*** $sql ***\n";             
              $db->query($sql,'StructureDump#collectRedir');   
             
              $c= $db->affectedRows();
              print "$c redir rows inserted in pass ".($i+1)." \n";
       
              if (!$c) break;
          }
          else {
              print "\n$sql\n";
          }
        }

       
        $i+= 1;
    }
   
    if ($db && $xtable && $xtable!=$dbtable) {
        print "stripping entries that are also contained in $xtable...\n";
        $sql= "DELETE FROM T USING $dbtable as T JOIN $xtable as X ON T.id = X.id";
        $db->query($sql,'StructureDump#strip');
    }
}
else {
    if ($format=='xml') {
        print "<?xml version='1.0' encoding='utf-8'?>\n";
       
        print "<wiki domain=\"".escapeHtml($wiki->domain, ENT_QUOTES)."\" time=\"".wfTimestamp(TS_MW)."\">\n";
    }
   
    foreach ($cats as $cat) {
        printNode( $cat, 1, $articles, $format);
    }
   
    if ($format=='xml') {
        print "</wiki>\n";
    }
}

$wiki->close();
if ($db) $db->close();
?>