#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
webchecklinks.py
A web wrapper around the checklinks library
(c) Dispenser, 2007-2008
'''
import cgi, re
import checklink, wikipedia, parser
import cgitb; cgitb.enable(logdir='./logs/', context=10)
def printu(ustr):
try:
print ustr
except:
print ustr.encode('utf-8')
def printFile(name, arg1="$1"):
f = open('./text/%s.html' % name )
print f.read().replace('$1', arg1.encode('utf-8'))
f.close()
def addEntry(self, page, url, refId, context, status, reason, redirect, rank, comment):
#printu(u'%s %s - %s' % (status, reason, wikilink))
if not rank < threshold:
classes = 'dead-%s'%rank
if url.encode('utf-8')+'\n' in open('/home/dispenser/webcite_requests.txt','r'):
classes += ' webcite'
if redirect:
classes += ' redirect'
printu('
| %s | %s | %s | %s |
' % (classes, refId or '', context.replace('[[', '[['), reason, status, rank and (comment or reason) or ''))
def textbox(name, value, label = None, attrib=''):
if label is None:
label = '%s: ' % name.capitalize()
print '' % (name, label, name, value, name, attrib)
def checkbox(name, checked, label = None, attr=''):
if checked:
attr += ' checked="checked"'
print '' % (name, name, attr, name, label or name.capitalize())
def main():
global threshold
#global hostname
form = cgi.FieldStorage()
checklink.DEBUG = bool(form.getfirst('debug', False))
checklink.SOURCE = bool(form.getfirst('source', False))
checklink.config.max_external_links = int(form.getfirst('threads', 30))
checklink.config.defaulttimeout = int(form.getfirst('timeout', 30))
checklink.config.httpDebug = int(form.getfirst('httpDebug', 0))
alwaysUseGet = bool(form.getfirst('alwaysUseGet', False))
htmlmode = bool(form.getfirst('html', False))
nothread = bool(form.getfirst('nothread', False))
threshold = int( form.getfirst('threshold', 0))
if form.getfirst('hostname'):
host = form.getfirst('hostname').split('.')
site = wikipedia.Site(host[0], host[1])
else:
site = wikipedia.getSite()
page = wikipedia.Page(site, form.getfirst('page', ''))
#hostname = page.hostname()
wikipedia.startContent(u'Checklinks: %s' % page.title(), form=False, notice='')
print ''
try:
page.get()
except wikipedia.NoPage, e:
printu('NoPage error encountered (%s)' % e)
return
except wikipedia.IsRedirectPage:
link = wikipedia.Page(page.site(), page._redirarg).aslink()[2:-2]
printu('
%s' % (page.hostname(), link, link))
return
try:
printFile('ChecklinksHeader')
print ''
printu('| %s |
' % (parser.escapeId(page.title().encode('utf-8')), page.hostname(), page.urlname(), page.aslink()[2:-2], page.title()))
printFile('tableHeader')
if htmlmode:
checklink.checkMWhtml(page, addEntry, alwaysUseGet, not nothread)
else:
checklink.checkMediaWikiPage(page, addEntry, alwaysUseGet, not nothread)
finally:
print '
'
if __name__ == "__main__" and wikipedia.handleUrlAndHeader():
try:
main()
finally:
wikipedia.endContent()