#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
webchecklinks.py
A web wrapper around the checklinks library
(c) Dispenser, 2007-2008
'''
import cgi, sys
import checklink, wikipedia
# Don't log since error generally happen in a thread and don't go through the normal handler
import cgitb; cgitb.enable()
def printu(ustr):
try:
print ustr
except:
print ustr.encode('utf-8')
def printFile(name, arg1="$1"):
f = open('./text/%s.html' % name )
print f.read().replace('$1', arg1.encode('utf-8'))
f.close()
def printEntry(self, page, url, refId, context, status, reason, redirect, rank, comment):
#printu(u'%s %s - %s' % (status, reason, wikilink))
classes = 'dead-%s'%rank
if url.encode('utf-8')+'\n' in open('/home/dispenser/webcite_requests.txt','r'):
classes += ' webcite'
if redirect:
classes += ' redirect'
printu('
| %s | %s | %s | %s |
' % (classes, refId or '', context.replace('[[', '[['), reason, status, rank and (comment or reason) or ''))
# flush to user
sys.stdout.flush()
def textbox(name, value, label = None, attrib=''):
if label is None:
label = '%s: ' % name.capitalize()
print '' % (name, label, name, value, name, attrib)
def checkbox(name, checked, label = None, attr=''):
if checked:
attr += ' checked="checked"'
print '' % (name, name, attr, name, label or name.capitalize())
def main():
#global hostname
form = cgi.FieldStorage()
htmlmode = bool(form.getfirst('html', False))
checklink.DEBUG = bool(form.getfirst('debug', False))
checklink.SOURCE = bool(form.getfirst('source', False))
checklink.config.max_external_links = int(form.getfirst('threads', 30))
checklink.config.defaulttimeout = int(form.getfirst('timeout', 30))
checklink.config.httpDebug = int(form.getfirst('httpDebug', 0))
checklink.config.useGET = bool(form.getfirst('alwaysUseGet', False))
checklink.config.threaded = not bool(form.getfirst('nothread', False))
if form.getfirst('hostname'):
host = form.getfirst('hostname').split('.')
site = wikipedia.Site(host[0], host[1])
else:
site = wikipedia.getSite()
page = wikipedia.Page(site, form.getfirst('page', ''))
#hostname = page.hostname()
wikipedia.startContent(u'Checklinks: %s' % page.title(), form=False, head='''''')
print ''
try:
page.get()
except wikipedia.NoPage, e:
printu('NoPage error encountered (%s)' % e)
return
except wikipedia.IsRedirectPage:
link = wikipedia.Page(page.site(), page._redirarg).aslink()[2:-2]
printu('
%s' % (page.hostname(), link, link))
return
printFile('checklinks-header')
if checklink.cacheAge(page) < float('inf'):
print checklink.time.strftime('Previously cached on %d %B %Y at %H:%M
', checklink.time.gmtime(checklink.time.time() - checklink.cacheAge(page)))
else:
print ""
print ''
#import parser
#printu('| %s |
' % (parser.escapeId(page.title().encode('utf-8')), page.hostname(), page.urlname(), page.aslink()[2:-2], page.title()))
printu('| %s |
' % ('', page.hostname(), page.urlname(), page.aslink()[2:-2], page.title()))
printFile('checklinks-tableHead')
try:
if htmlmode:
checklink.checkMWhtml(page, printEntry)
else:
checklink.checkMediaWikiPage(page, printEntry)
finally:
print '
'
if __name__ == "__main__" and wikipedia.handleUrlAndHeader():
try:
main()
finally:
wikipedia.endContent()