#! /usr/bin/python import cgi import MySQLdb import wikipedia import cgitb; cgitb.enable() db=None def replag(): """ Lazy man's Replag, determines the date of the newest entry """ c = db.cursor() c.execute("SELECT time_to_sec(timediff(now(),rev_timestamp)) as replag FROM revision ORDER BY rev_timestamp DESC LIMIT 1") return c.fetchone()[0] def printLinks(offset, limit, queryString): basehref="/~dispenser/cgi-bin/geosearch.py?"+queryString if offset > 0: print '

View (previous %d) '% (basehref, offset-limit >=0 and offset-limit or 0, limit, limit) else: print '

View (previous %d) '% (limit, ) if True: print '(next %d)' % (basehref, offset+limit, limit, limit) else: print '(next %d)' % (limit,) print '(20 | 50 | 100 | 250 | 500)

'% locals() def main(): form = cgi.FieldStorage() #SHOULDBE: /query -> ?regexp=query namespace = int(form.getfirst('namespace', 0)) regexp = form.getfirst('regexp', form.getfirst('page', "")) site = wikipedia.Site(form.getfirst('site', 'en')) isRegex = form.getfirst('type', 'like')=='regex' invert = bool(form.getfirst('invert', False)) limit = int(form.getfirst('limit', 100)) offset = int(form.getfirst('offset', 0)) print r'''
GeoHack External links

Examples, Cheat sheet: \w → [[:alnum:]], \d → [[:digit:]], \s → [[:space:]], \b → [[:<:]] or [[:>:]], search of parameter values: params=[^&=]+

:

''' % (wikipedia.escape(regexp), ) print '' % (isRegex and 'checked="checked" ' or '') print '' % (invert and 'checked="checked" ' or '') if site.language() != 'en': print '' % site.language() print '
' # if not regexp: return queryString = 'regexp='+wikipedia.urllib.quote(regexp)+(site.language()!='en' and '&site='+site.language() or '') global db db = MySQLdb.connect(db=site.dbName()+'_p', host=site.dbName().replace('_', '-') + '-p.db.toolserver.org', read_default_file="/home/dispenser/.my.cnf") c = db.cursor() c.execute("SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED") # subReg = regexp.replace('%', '\%').replace('.*', '%').replace('.', '_') # regex_charset = '$^*()+{}|[].?' # for char in regex_charset: # if char in subReg: # regexp=regexp # break # else: # # all Chars are non regex # isRegex = False if isRegex: # REGEXP c.execute(""" SELECT el_to FROM externallinks /* JOIN page on el_from = page_id */ WHERE /* page_namespace=%s AND */ el_to LIKE %s AND el_to """+(invert and 'NOT' or '')+""" REGEXP %s LIMIT %s,%s;""", (namespace, "http://stable.toolserver.org/geohack/geohack.php"+"?%", regexp, offset, limit)) else: # LIKE c.execute(""" SELECT el_to FROM externallinks /* JOIN page on el_from = page_id */ WHERE /* page_namespace=%s AND */ el_to LIKE %s AND el_to """+(invert and 'NOT' or '')+""" LIKE %s LIMIT %s,%s;""", (namespace, "http://stable.toolserver.org/geohack/geohack.php"+"?%", "%"+regexp+"%", offset, limit)) printLinks(offset, limit, queryString) print '' print '' print '' print '' idx = offset for (el_to,) in c.fetchmany(limit): print '' qs = el_to[len("http://stable.toolserver.org/geohack/geohack.php?"):].split('&') pagename = '' title = '' params = '' for s in qs: if s.startswith('pagename='): pagename = s[9:] title = wikipedia.urllib.unquote(s[9:]) elif s.startswith('params='): params = s[7:] elif s.startswith('title=') and pagename == '': pagename = s[6:] title = wikipedia.urllib.unquote(s[6:]) iProps = min(params.find('_W_')+1 or len(params)-1, params.find('_E_')+1 or len(params)-1)+1 idx+=1 print ('' % ( idx, site.hostname(), pagename, title.replace('_', ' '), el_to, params[:iProps], params[iProps:].replace('_', ' '), ) ).replace('&', '&') print '' print '
TitleCoordinate Parameters
%d%s%s%s
' printLinks(offset, limit, queryString) if __name__ == "__main__" and wikipedia.handleUrlAndHeader(): import time startTime = time.time() try: wikipedia.startContent(form=False) main() finally: wikipedia.endContent(replag = db and replag() or 0) wikipedia.stopme()