#!/usr/bin/env python # -*- coding: utf-8 -*- import cgi, oursql, urllib import wikipedia import cgitb; cgitb.enable(logdir='tracebacks') connections = {} def getConn(dbname): if not dbname.endswith('_p'): dbname+='_p' if dbname not in connections: connections[dbname] = oursql.connect(db=dbname, host=dbname.replace('_', '-') + '.rrdb.toolserver.org', read_default_file='/home/dispenser/.my.cnf', charset=None, use_unicode=False) return connections[dbname] def likeescape(s, escape='\\'): return s.replace('\\','\\\\').replace('_', '\\_').replace('%', '\\%') def main(): form = cgi.FieldStorage() def get_form_int(name, defaultValue): try: return int(form.getfirst(name, defaultValue)) except ValueError: return defaultValue limit = get_form_int('limit', 100) offset = get_form_int('offset', 0) page = wikipedia.Page( wikipedia.getSite(), form.getfirst('prefix', form.getfirst('page', 'Main_Page')), defaultNamespace=get_form_int('namespace', 0) ) namespace = page.namespace() prefix = page.titleWithoutNamespace(underscore=True).encode('utf-8') def phrase_link(title, label="", className=None): return '%s'%tuple(wikipedia.escape(s) for s in (page.hostname(), urllib.quote(title.replace(' ', '_'), safe=";@$!*(),/:-_."), className or '', title.replace('_', ' '), label or title.replace('_', ' '), )) print '
Display redlinks with prefix: %s
'%(page.title().encode('utf-8'),) cursor = getConn(page.site().dbName()).cursor() cursor.execute(""" /* redlinks.py LIMIT:180 */ SELECT pl_namespace, pl_title, ns_name, COUNT(*) AS link_count, SUM(ref.page_namespace = pl_namespace) AS ns_count, (SELECT GROUP_CONCAT(DISTINCT DATE_FORMAT(log_timestamp, "%b %Y") SEPARATOR ", ") FROM logging_ts_alternative WHERE log_namespace = pl_namespace AND log_title = pl_title AND log_action = "delete" ) AS log_deletes, ref.page_namespace, ref.page_title FROM page AS ref JOIN pagelinks ON pl_from = ref.page_id JOIN toolserver.namespace ON dbname = (SELECT DATABASE()) AND ns_id = pl_namespace LEFT JOIN page AS pl ON pl.page_namespace=pl_namespace AND pl.page_title=pl_title WHERE pl.page_id IS NULL AND pl_namespace = ? AND pl_title LIKE ? GROUP BY pl_namespace, pl_title LIMIT ?, ? """, (namespace, likeescape(prefix)+'%', offset, limit+1) ) results = cursor.fetchmany(limit) print '' if results: print 'Showing results %d-%d, results were limited to %d
'%(offset+1, limit+offset, limit,) else: print '' print "There were no results" #conn.close() if __name__ == "__main__" and wikipedia.handleUrlAndHeader(): try: wikipedia.startContent(form=True) main() finally: wikipedia.endContent() wikipedia.stopme()