#!/usr/bin/env python # -*- coding: utf-8 -*- import wikipedia import oursql import cgitb; cgitb.enable(logdir='tracebacks') connections = {} def getConn(wiki, host=""): if not wiki.endswith('_p'): wiki+='_p' if (host,wiki) not in connections: connections[host,wiki] = oursql.connect( db=wiki, host=host or wiki.replace('_', '-')+'.rrdb.toolserver.org', read_default_file="/home/dispenser/.my.cnf", charset=None, use_unicode=False ) return connections[host,wiki] def reconnect(wiki, host=""): if not wiki.endswith('_p'): wiki+='_p' del connections[host,wiki] return getConn(wiki, host) def main(): banners = [] min_results = 500 project_results = 0 def createLink(title, label): return '%s'%tuple(wikipedia.escape(s) for s in ("en.wikipedia.org", wikipedia.urllib.quote(title.replace(' ', '_')), title.replace('_', ' '), label or title.replace('_', ' '), )) for arg in wikipedia.handleArgs(): if arg.startswith('-banner:'): try: banners = arg[8:].decode('utf-8').encode('utf-8').split('|') except: banners = arg[8:].decode('latin-1').encode('utf-8').split('|') conn=getConn("enwiki_p", "sql-s1-user") cursor = conn.cursor() wikipedia.logtime('Database connection') print """
Topic points
""" try: cursor.execute("""/* topic_points LIMIT:5 NM */ CREATE TEMPORARY TABLE u_dispenser_p.dab_remain ( dr_page INT(11) UNSIGNED NOT NULL PRIMARY KEY, dr_count SMALLINT UNSIGNED NOT NULL ) ENGINE=MyISAM AS SELECT DISTINCT adl.article_id AS dr_page, COUNT(*) AS dr_count FROM p_dpl_p.all_dab_links_basic AS adl JOIN p_dpl_p.contest_dabs ON c_id=dab_id /* The above table isn't updated frequently */ LEFT JOIN p_dpl_p.ch_fixed_links AS cfl ON cfl.article_id=adl.article_id AND cfl.dab_id=adl.dab_id WHERE cfl.article_id IS NULL GROUP BY adl.article_id; """) wikipedia.logtime('Built dab_remain table') # If a LEFT JOIN (to include 0 pointers), query time increases from 2 sec to 20 sec cursor.execute("""/* topic_points LIMIT:5 NM */ SELECT pb_title, COUNT(*) AS pages, SUM(dr_count) AS points FROM u_dispenser_p.projectbanner JOIN u_dispenser_p.dab_remain ON dr_page=pb_page GROUP BY pb_title ORDER BY FLOOR(LOG10(SUM(dr_count))) DESC, -- Do this transform twice since CPU is cheaper than memory TRIM("_" FROM REPLACE(pb_title, "WikiProject", "")) ASC """) except oursql.ProgrammingError: # 1290 --read-only option # 1226 max_user_connections exceeded # 1317 Query killed banners_joined = '|'.join(banners).replace('_', ' ') print '' % (wikipedia.escape(banners_joined),) cursor = reconnect("enwiki_p", "sql-s1-user").cursor() else: print '' wikipedia.logtime('Generated topic list') print """
""" try: cursor.execute("""/* topic_points LIMIT:5 */ SELECT TIMESTAMPDIFF(MINUTE, CREATE_TIME, NOW()) FROM information_schema.tables WHERE TABLE_SCHEMA=? AND TABLE_NAME=? """, tuple('p_dpl_p.ch_fixed_links'.split('.'))) print 'The list is updated hourly; the last update completed %s minutes ago.'%cursor.fetchall().pop() except: cursor = reconnect("enwiki_p", "sql-s1-user").cursor() print '
' wikipedia.logtime('Calculated update lag') for banner in banners: cursor.execute("""/* topic_points LIMIT:180 */ SELECT adl.article_title AS "Article", GROUP_CONCAT(adl.dab_title SEPARATOR "|") AS "Dab links", COUNT(c_id) AS "Points", IF(tl_from IS NULL, "", "Yes") AS "{{dn}}"/*, -- EXISTS (SELECT 1 FROM pagelinks WHERE pl_from=adl.article_id AND pl_namespace=0 AND pl_title=adl.dab_title) AS "Still linked?", (SELECT COUNT(*) FROM pagelinks JOIN page ON page_namespace=pl_namespace AND page_title=pl_title JOIN u_dispenser_p.projectbanner AS plb ON plb.pb_page=page_id AND plb.pb_title=? WHERE pl_from=adl.dab_id ) AS "ProjChoice" /*-*/ FROM u_dispenser_p.projectbanner JOIN p_dpl_p.all_dab_links_basic AS adl ON article_id=pb_page %s JOIN p_dpl_p.contest_dabs ON c_id=dab_id LEFT JOIN templatelinks ON tl_from=pb_page AND tl_namespace=10 AND tl_title="Disambiguation_needed" LEFT JOIN p_dpl_p.ch_fixed_links AS cfl ON cfl.article_id=adl.article_id AND cfl.dab_id=adl.dab_id WHERE pb_title=? AND cfl.article_id IS NULL GROUP BY pb_page ORDER BY FLOOR(LOG(5, COUNT(c_id))) DESC, Article ASC; """ % ("" if project_results > min_results else "LEFT",), (banner,)) wikipedia.logtime('Queried dablinks pages') print '' print '' print '' for tup in cursor.description: print ''%tup[0] print '' count = 0 for tup in cursor: if tup[2] == 0 and count >= min_results: cursor.nextset() break count += 1 print '''''' % ( createLink(tup[0], tup[0].replace('_', ' ')), ', '.join(createLink(s, s.replace('_', ' ')) for s in tup[1].split('|')), tup[2], wikipedia.escape(tup[3]), wikipedia.urllib.quote(tup[0]), wikipedia.urllib.quote(banner), '&commonfixes=yes' if wikipedia.SysArgs.get('commonfixes', 'false')=='true' else '', ) print '
' #print createLink("Template:%s"%banner, banner.replace('_', ' ')) print createLink("Wikipedia:%s"%banner, banner.replace('_', ' ')) print '
%s
%s %s %d %s FIX
' wikipedia.logtime('Generated HTML results table') print 'Displaying %(count)d pages'%locals() if __name__ == "__main__" and wikipedia.handleUrlAndHeader(): try: wikipedia.startContent(form=False) main() except oursql.Error as (errno, strerror, extra): if errno in (1040, 1226, 2013): # Too many connection / Max connections / Lost connection print '

Database operational error, retrying a few minutes.

%s
'%(wikipedia.escape(strerror),) print '' else: raise finally: wikipedia.endContent() wikipedia.stopme()