#!/usr/bin/python # -*- coding: utf-8 -*- ''' JSON version of webchecklinks.py webchecklinks.py A web wrapper around the checklinks library (c) Dispenser, 2007-2008 ''' import cgi import checklink, wikipedia import parser import cgitb; cgitb.enable(logdir='./logs/') def printu(ustr): print ustr.encode('utf-8') def quote(s): return unicode(s).replace('"', '\\"').replace('\n', '\\n') def list2json(list): if list: return ','.join([u'"%s"'%quote(value) for value in list]) else: return '' def dict2json(dict): if dict: return ',\n\t\t\t\t'.join([(u'"%s": "%s"'%(key, quote(value))) for (key, value) in dict.items()]) else: return '' def addEntry(page, url, title, status, reason, rank, comment, headers, redirect, refId, metadata = {}, statsData=''): #wikipedia.output(page.aslink()) #return # Do nothing with statsData if len(reason) >= 37: reason = reason[0:25] + '...' if rank >= threshold: # "([^\n]*)", "([^\n]*)", "([^\n]*)", "([^\n]*)", "([^\n]*)", "([^\n]*)", "([^\n]*)", "([^\n]*)" # \1\t\5\t\3\t\4\t\6\t\7\t\2\t\8 printu('''\ { "url": "%s", "title": "%s", "metadata": { %s }, "status": %d, "reason": "%s", "headers": { %s }, "redirect": "%s", "rank": %d, "comment": "%s" },''' % (quote(url), quote(parser.parser(title or "", page.hostname())), dict2json(metadata), status, quote(reason), dict2json(headers), quote(redirect or ''), rank, quote(parser.parser(comment, page.hostname())),)) def main(): global threshold form = cgi.FieldStorage() checklink.DEBUG = False global callback callback = form.getfirst('callback','') checklink.config.max_external_links = int(form.getfirst('threads', 30)) checklink.config.defaulttimeout = int(form.getfirst('timeout', 30)) checklink.config.httpDebug = int(form.getfirst('httpDebug', 0)) alwaysUseGet = bool(form.getfirst('alwaysUseGet', False)) nothread = bool(form.getfirst('nothread', False)) threshold = int( form.getfirst('threshold', 0)) if form.getfirst('hostname'): host = form.getfirst('hostname').split('.') site = wikipedia.Site(host[0], host[1]) else: site = wikipedia.getSite() page = wikipedia.Page(site, form.getfirst('page', '')) if bool(form.getfirst('debug', False)): import cgitb; cgitb.enable() print 'Content-type: text/plain' print print page.get().encode('utf-8') # Check to make sure we can get the page try:page.get() except:pass print 'Content-Type: text/javascript; charset=utf-8' print if callback: print callback + '({' else: print '{' print '"error":{"info":"This tool has been temporaly disabled"}})' print '/*' raise try: page.get() except Exception, e: print '"error": {"info":"%s"}' % quote(e) if callback: print '})' else: print '}' return try: printu(' "title":"%s",' % page.title()) print ' "hostname":"%s",' % page.hostname() print ' "links": [' checklink.checkMediaWikiPage(page, addEntry, alwaysUseGet, not nothread) finally: print ' null/* hack for last , */' print ' ]' if callback: print '})' else: print '}' # print 'if(checklinksError)checklinksError("Please update to the new API")' # print '/* Callback function */' # print 'if(checklinksDone)checklinksDone();' if __name__ == "__main__": main()