<?
define("WS_ADMIN",true);
require_once( "../common/WSInit.php" );
require_once( "WikiAccess.php" );
function printTsvRow( $page ) {
print (int)$page['page_id'] . "\t";
print (int)$page['page_namespace']."\t";
print ($page['page_is_redirect'] ? 'r' : '-') . "\t"; #TODO: redir-target from redir-table!
print $page['page_title']."\t";
print $page['page_touched']."\t";
print "\n";
}
function getNodeAttributes( $page, $node = NULL ) {
global $wiki;
if (!is_array($page)) {
$page= $wiki->fetchWikirecord( $page, NS_CATEGORY );
return false;
}
$name= $page['page_title'];
if ($page['page_namespace']) $name= $wiki->getNsText($page['page_namespace']).':'.$name;
$s= '';
$s.= ' id="'.(int)$page['page_id'].'"';
#$s.= ' page="'.escapeHtml($name, ENT_QUOTES).'"';
$s.= ' namespace="'.(int)$page['page_namespace'].'"';
$s.= ' title="'.escapeHtml($page['page_title'], ENT_QUOTES).'"';
$s.= ' touched="'.escapeHtml($page['page_touched'], ENT_QUOTES).'"';
if ($page['page_is_redirect']) $s.= ' redirect="yes"'; #TODO: get redirect target?!
if ($node) $s.= ' node="'.escapeHtml($node, ENT_QUOTES).'"';
return $s;
}
function getInsertStatement( $page, $level = 1 ) {
global $wiki, $dbtable, $db;
if (!is_array($page)) {
$page= $wiki->fetchWikirecord( $page, NS_CATEGORY );
return false;
}
#TODO: collect lines, make bulk-inserts!
#$name= $page['page_title'];
#if ($page['page_namespace']) $name= $wiki->getNsText($page['page_namespace']).':'.$name;
$s= 'INSERT INTO '.$dbtable.' (id, namespace, title, touched, is_redirect, level) VALUES ( ';
$s.= (int)$page['page_id'].', ';
#$s.= '"'.addslashes($name).'", ';
$s.= (int)$page['page_namespace'].', ';
$s.= '"'.addslashes($page['page_title']).'", ';
$s.= '"'.addslashes($page['page_touched']).'", ';
$s.= (int)$page['page_is_redirect'].', ';
$s.= (int)$level.' ';
$s.= ' ) ';
$s.= 'ON DUPLICATE KEY UPDATE touched = VALUES(touched), is_redirect = VALUES(is_redirect), level = VALUES(level)';
return $s;
}
function printNode( $page, $level = 1, $articles = true, $format = 'xml') {
global $stoplist, $wiki, $db, $wdb, $depth, $redir;
if ($depth && $level>($depth+1)) return;
$dent= str_repeat(' ',$level);
if (!is_array($page)) {
if ( strpos($page, ':') !== false ) $ns= NULL;
else $ns= NS_CATEGORY;
$page= $wiki->fetchWikirecord( $page, $ns );
if (!$page) return false;
}
if ($page['page_namespace'] != NS_CATEGORY) {
if (!$articles) return;
if ($format=='sql') {
$sql= getInsertStatement($page, $level);
if ($db) {
$db->query($sql, 'printNode#insert');
wsfLog('inserted: '.substr($sql,0,128).'...', LL_VERBOSE);
}
else print "$sql\n";
}
else if ($format=="xml") {
print $dent.'<page '.getNodeAttributes($page).'/>'."\n";
}
else {
if ( !isset($stoplist[$page['page_id']]) ) {
printTsvRow($page);
}
}
}
else if ( isset($stoplist[$page['page_id']]) ) {
if ($format=='xml') {
print $dent.'<page '.getNodeAttributes($page, 'ref').'/>'."\n";
}
}
else {
$stoplist[$page['page_id']]= true;
if ($format=='sql') {
$sql= getInsertStatement($page, $level);
if ($db) {
$db->query($sql, 'printNode#insert');
wsfLog('inserted: '.substr($sql,0,128).'...', LL_VERBOSE);
}
else print "$sql\n";
}
else if ($format == "xml") {
print $dent.'<page '.getNodeAttributes($page).'>'."\n";
}
else {
printTsvRow($page);
}
$sql= "SELECT page_id, page_title, page_namespace, page_touched, page_is_redirect
FROM page
JOIN categorylinks ON cl_from = page_id
WHERE cl_to = " . $wdb->addQuotes($page['page_title']);
if ($redir) {
$sql .= "UNION
SELECT R.page_id, R.page_title, R.page_namespace, R.page_touched, R.page_is_redirect
FROM page as P
JOIN categorylinks ON cl_from = page_id
JOIN pagelinks ON pl_namespace = P.page_namespace AND pl_title = P.page_title
JOIN page as R ON R.page_id = pl_from
WHERE cl_to = " . $wdb->addQuotes($page['page_title']) ."
AND R.page_namespace = P.page_namespace
AND R.page_is_redirect > 0
";
}
if (!$articles) $sql.= " AND page_namespace = ".NS_CATEGORY;
$sql.= " ORDER BY page_namespace, page_title";
$res= $wiki->wikiDB->query($sql, 'printNode');
while ($row= $wiki->wikiDB->fetchRow($res)) {
printNode( $row, $level+1, $articles, $format);
}
$wiki->wikiDB->freeResult($res);
if ($format=='xml') print $dent.'</page>'."\n";
}
}
#--------------------------------------------------------
if (!isset($args[0])) {
echo "USAGE: StructureDump.php <wiki> <category>";
exit(1);
}
$lang= array_shift($args);
$cats= $args;
$articles= !@$options['catonly'];
$format= @$options['format'];
$depth= @$options['depth'];
$dbtable= @$options['table'];
$xtable= @$options['xtable'];
$dbname= @$options['insert'];
$dbtrunc= @$options['truncate'];
$dbcreate= @$options['create'];
$redir= @$options['redir'];
if ($dbtable && !$format) $format= 'sql';
if ($dbname && !$format) $format= 'sql';
if (!$format) $format= 'xml';
if ($format=='sql' && !$dbtable) die("missing --table option!\n");
$db= NULL;
$wiki= WikiAccess::newInstance( $lang );
$wdb =& $wiki->wikiDB;
if ($dbname) {
if (preg_match('!^%|//!', $dbname)) {
$db=& openConnection($dbname);
if (!$wdb) die("failed to connect to database!\n");
#else print "DB-CONNECTION OK\n";
}
else {
$dbtable = "$dbname.$dbtable";
$db=& $wdb;
}
if ($dbcreate) {
$db->query("DROP TABLE IF EXISTS $dbtable",'StructureDump#drop');
$db->query("CREATE TABLE IF NOT EXISTS $dbtable (
id INT(12) NOT NULL,
namespace MEDIUMINT NOT NULL,
title VARCHAR(255) binary NOT NULL,
touched CHAR(12) NOT NULL,
is_redirect TINYINT DEFAULT 0,
level TINYINT DEFAULT NULL,
PRIMARY KEY (id),
UNIQUE KEY title (namespace, title),
KEY redirect (is_redirect, namespace),
KEY touched (touched, namespace),
KEY level (level, namespace)
)",'StructureDump#create');
print "CREATED TABLE $dbtable\n";
}
else {
$db->query("TRUNCATE TABLE $dbtable",'StructureDump#truncate');
print "TRUNCATED TABLE $dbtable\n";
}
}
$stoplist= array();
if (!$cats) {
$cat= @$wiki->root_category;
if (!$cat) {
echo "No root category known for {$wiki->domain}. Please specify!";
$wiki->close();
exit(1);
}
$cats= array( $cat );
}
if ($format=='sql') {
$c= '';
foreach ($cats as $i => $cat) {
if ($i>0) $c.= ', ';
$cat= $wiki->asDBKey($cat);
$c.= $wdb->addQuotes($cat);
}
print "initializing table...\n";
$sql= "INSERT INTO $dbtable (id, namespace, title, touched, is_redirect, level)
SELECT page_id, page_namespace, page_title, page_touched, page_is_redirect, 0 as level
FROM {$wiki->dbname}.page
WHERE page_namespace = ".NS_CATEGORY." AND page_title IN ( ".$c." )";
if ($db) $db->query($sql,'StructureDump#basecats');
else print "\n$sql\n";
$i=0;
while (!$depth || $i<$depth) {
if (!$articles) $nswhere= ' AND page_namespace = ' . NS_CATEGORY;
else $nswhere= '';
print "pass ".($i+1)."...\n";
$sql= "INSERT IGNORE INTO $dbtable (id, namespace, title, touched, is_redirect, level)
SELECT page_id as i, page_namespace as n, page_title as t, page_touched as o, page_is_redirect as r, level as l
FROM (
SELECT page_id, page_namespace, page_title, page_touched, page_is_redirect, ".($i+1)." as level
FROM {$wiki->dbname}.page
JOIN {$wiki->dbname}.categorylinks ON cl_from = page_id
JOIN $dbtable ON title = cl_to AND namespace = ".NS_CATEGORY."
WHERE level = $i $nswhere ) as X";
if ($db) {
#print "*** $sql ***\n";
$db->query($sql,'StructureDump#collect');
$c= $db->affectedRows();
print "$c rows inserted in pass ".($i+1)." \n";
if (!$c) break;
}
else {
print "\n$sql\n";
}
if ($redir) {
$sql= "INSERT IGNORE INTO $dbtable (id, namespace, title, touched, is_redirect, level)
SELECT page_id as i, page_namespace as n, page_title as t, page_touched as o, page_is_redirect as r, level as l
FROM (
SELECT R.page_id, R.page_namespace, R.page_title, R.page_touched, R.page_is_redirect, T.level
FROM $dbtable
JOIN {$wiki->dbname}.pagelinks ON pl_namespace = T.namespace AND pl_title = T.title
JOIN {$wiki->dbname}.page as R ON R.page_id = pl_from
WHERE T.level = ".($i+1)."
AND T.namespace = R.namespace
AND R.page_is_redirect > 0 ) as X";
if ($db) {
#print "*** $sql ***\n";
$db->query($sql,'StructureDump#collectRedir');
$c= $db->affectedRows();
print "$c redir rows inserted in pass ".($i+1)." \n";
if (!$c) break;
}
else {
print "\n$sql\n";
}
}
$i+= 1;
}
if ($db && $xtable && $xtable!=$dbtable) {
print "stripping entries that are also contained in $xtable...\n";
$sql= "DELETE FROM T USING $dbtable as T JOIN $xtable as X ON T.id = X.id";
$db->query($sql,'StructureDump#strip');
}
}
else {
if ($format=='xml') {
print "<?xml version='1.0' encoding='utf-8'?>\n";
print "<wiki domain=\"".escapeHtml($wiki->domain, ENT_QUOTES)."\" time=\"".wfTimestamp(TS_MW)."\">\n";
}
foreach ($cats as $cat) {
printNode( $cat, 1, $articles, $format);
}
if ($format=='xml') {
print "</wiki>\n";
}
}
$wiki->close();
if ($db) $db->close();
?>StructureDump.php
application/x-php, 10981 bytes (load raw)

