#! /usr/bin/python import MySQLdb, sys import ghel import time; starttime=time.time() def main(): dbName = '' template = 'Coord_missing' for arg in sys.argv[1:]: if arg.startswith('-dbname:'): dbName = arg[8:] elif arg.startswith('-template:'): template = arg[10:] if not dbName: dbName = raw_input('Database [%s]:'%'enwiki') or 'enwiki' template = raw_input('Template [%s]:' % template) or template wdb = MySQLdb.connect(db=dbName+'_p', host=dbName.replace('_', '-')+'-p.db.toolserver.org', read_default_file="/home/dispenser/.my.cnf") wiki = wdb.cursor() wiki.execute("SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED") # TODO GROUP_CONCAT(ll_lang, ll_title) wiki.execute(""" /* SLOW_OK */ SELECT page_title, ll_lang, ll_title FROM page, langlinks, templatelinks WHERE ll_from = tl_from AND tl_from = page_id AND tl_namespace=10 AND tl_title = %s AND page_namespace=0 /* AND ll_lang in ('de', 'fr') */ ORDER BY page_title ASC /* LIMIT 1000, 45000 */ """, (template,)) print '# title \tinterwiki \tprimary\tparams' for (title, lang, ll_title) in wiki.fetchall(): iwfindcoords(title.replace('_', ' '), lang, ll_title) print "Completed in %#4.2f minutes with python using %s seconds of CPU" %((time.time()-starttime)/60, time.clock()) def iwfindcoords(title, lang, ll_title): try: # Workaround for multiple language aliases not supported directly in the TS' DB if lang == 'zh-classic': lang = 'zh-classical' elif lang == 'nan': lang = 'zh-min-nan' elif lang == 'nb': lang = 'no' db = MySQLdb.connect(db=lang.replace('-', '_')+'wiki_p', host=lang.replace('_', '-')+'wiki-p.db.toolserver.org', read_default_file="/home/dispenser/.my.cnf") except MySQLdb.OperationalError, e: if e and e[0] == 2005: print "DEPRECATED: %s alias not in listed for [[%s:%s]] on [[%s]]" % (lang, lang, ll_title, title) else: raise c = db.cursor() c.execute(""" SELECT el_to FROM page JOIN externallinks ON page_id = el_from WHERE page_namespace=%s AND page_title = %s AND ( el_to LIKE "http://stable.toolserver.org/geohack/geohack.php?_%%" OR el_to LIKE "http://www.nsesoftware.nl/wiki/maps.asp?_%%" )""", (0, ll_title.replace(' ', '_'), )) for (link, ) in c.fetchall(): begin = link.find('params=')+7 end = link.find('&', begin) if begin > 8: # -1 + 7 params = link[begin: end > begin and end or None] print '[[%s]]\t[[%s:%s]]\t%s\t%s' % (title, lang, ll_title, 'title=' in link and 'no' or 'yes', params) c.close() if __name__ == "__main__": main()