"""
# printWikiFile('siteNotice')
print """
""" '''
def endContent():
print """
"""
def wikiParser(text):
return parser.parser(text)
def printHelpLinks():
print '''
'''
def printReportSummary(filename):
try:
modifiedtime = os.path.getmtime('./reports/'+filename.replace('/', '|'))
global notes
#notes = ('Updated %s ago on %s (UTC)' % (timeago(time.time() - modifiedtime), time.ctime(modifiedtime), ),) #time.strftime('%A, %Y %d, %B at %H:%M (UTC)', time.gmtime())
notes = ('This page was last modified on %s' % time.ctime(modifiedtime),)
except OSError:
printWikiFile('doesnotexist')
return
pagecount = 0
rankstats = {'0':0,'1':0,'2':0,'3':0,'4':0,'5':0,'6':0}
barelinks = {}
counter = 0
print '
'
print '| Article | Tools | Suspicious | Dead |
'
for page in open('./reports/%s' % filename.replace('/', '|')):
pagecount += 1
pagename = page.strip('\n').split('\t')[1]
laststats = {'0':0,'1':0,'2':0,'3':0,'4':0,'5':0,'6':0}
try:
for linkentry in open('./cache/'+page.replace('\t', ':').replace(' ', '_').replace('/', '|').strip('\n')):
items = linkentry.split('\t')
# print '', items, ''
rankstats[items[8]] = rankstats.get(items[8], 0) + 1
laststats[items[8]] = laststats.get(items[8], 0) + 1
if '"external autonumber"' in linkentry:
barelinks[pagename] = barelinks.get(pagename, 0) + 1
except IOError:
print '| WARNING: %s is not cached |
'%pagename
continue
if laststats['4'] or laststats['5']:
counter+= 1
print ''
print wikiParser('| [[%s]] | ' % (pagename.replace('|', '/'), ))
print 'view, refresh | '%(pagename,'/~dispenser/cgi-bin/webchecklinks.py/', pagename)
for num in (laststats['4'], laststats['5']):
print num and '% 3d | '%num or ' | '
print '
'
print '| Showing %d out of %d pages |
' % (counter, pagecount)
print '
'
# Blank file, divide by zero errors
if pagecount == 0: return
linkcount = sum(rankstats.values())
print """
Overview
| Links | Avg per page | Percent total |  |
| Good: | %6d | %5.3g | %5.1f%% |
| Status: | %6d | %5.3g | %5.1f%% |
| Warn: | %6d | %5.3g | %5.1f%% |
| Suspicious: | %6d | %5.3g | %5.1f%% |
| Error: | %6d | %5.3g | %5.1f%% |
| Conection: | %6d | %5.3g | %5.1f%% |
| Badlinks: | %6d | - |
| Total: | %6d | %#.3g | %2.2f%% |
""" % (
','.join(["%d"%(rankstats[c]*100/linkcount+0.5) for c in '012345']),
3.14159-(3.14159*rankstats['0']/linkcount),
rankstats['0'], (rankstats['0']/float(pagecount)), (rankstats['0']*100/float(linkcount)),
rankstats['1'], (rankstats['1']/float(pagecount)), (rankstats['1']*100/float(linkcount)),
rankstats['2'], (rankstats['2']/float(pagecount)), (rankstats['2']*100/float(linkcount)),
rankstats['4'], (rankstats['4']/float(pagecount)), (rankstats['4']*100/float(linkcount)),
rankstats['5'], (rankstats['5']/float(pagecount)), (rankstats['5']*100/float(linkcount)),
rankstats['3'], (rankstats['3']/float(pagecount)), (rankstats['3']*100/float(linkcount)),
rankstats['6'],
linkcount, (linkcount/float(pagecount)), 100,
)
if barelinks:
print parser.parser("
These pages appear to have bare links: "+', '.join(sorted(["[[%s]] (%d)"%(k,v) for k,v in barelinks.items()]))+"
")
def printLinksSummary(filename):
if not os.path.exists('./jobs/%s.tsv' % filename.replace('/', '|')):
printWikiFile('doesnotexist')
return
broken = {}
onthefly = '/~dispenser/cgi-bin/webchecklinks.py?page='
Modify_time = os.stat('./jobs/%s.tsv' % filename.replace('/', '|'))[9]
print '
Updated %s ago on %s (UTC)
' % (timeago(time.time() - Modify_time), time.ctime(Modify_time), ) #time.strftime('%A, %Y %d, %B at %H:%M (UTC)', time.gmtime())
f = open('./jobs/%s.tsv' % filename.replace('/', '|'))
lastpage = ''
pagecount = 0
rankstats = [0, 0, 0, 0, 0, 0, 0]
# codestats = {}
barelinks = {}
for line in f:
if not line or line.count('\t') < 7:# for whatever reason we always \n as the last item 6:
continue
elif line.startswith('#'):
# Print file comments
if not lastpage:
print wikiParser(line[1:])
continue
# Old format: (pagename, wikilink, status, reason, content_length, content_type, rank, comments)
# New format: (pagename, wikilink, status, reason, metadata, Ref No., rank, comment
v = line.split('\t')
if 6 > int(v[6]) >= 4:
if not broken.has_key(v[0]):
broken[v[0]] = [0, 0]
broken[v[0]][int(v[6])-4] += 1
if v[0] != lastpage:
lastpage = v[0]
pagecount += 1
# Geather statistics
rankstats[int(v[6])] += 1
# if v[2] not in codestats:codestats[v[2]] = 0
# codestats[v[2]] += 1
if v[4] == '{}' and (v[1].startswith('http://') or ' ' not in v[1] and v[5]):
if v[0] in barelinks:
barelinks[v[0]] += 1
else:
barelinks[v[0]] = 1
f.close()
print '
'
print '| Article | Links tab | Suspicious | Dead | Tools |
'
counter = 0
for key in broken.iterkeys():
pagename = key[2:key.find(']')]
anchorname = parser.escapeId(pagename)
print wikiParser('| %s | View | ' % (key, filename, anchorname, ))
for num in broken[key]:
print num and '% 3d | '%num or ' | '
print 'Checklinks (cache), Dablinks |
' % (pagename, pagename, pagename)
counter += 1
print '| Show %d out of %d pages |
' % (counter, pagecount)
print '
'
# print '
'
# colormap = {'-':'ccccff','1':'ABCEDF', '2':'CCFCCC', '3':'CCE5F2', '4':'FC9999', '5':'E5CCB3'}
# if '200' in codestats:
# total = float(sum(codestats.values()))
# print '
The right graph excludes "200 OK" responses, thus representing only %d%% of all responses.
' % (100.5-100*codestats['200']/total)
# print '
)
' % (3.14159-codestats['200']*3.14159/total, codestats['200']*100/total, 100-(codestats['200']*100/total), codestats['200']*100/total+0.5,)
# del codestats['200']
#
# # group values below 1% into "Other"
# total = sum(codestats.values())
# for (key, value) in codestats.items():
# if value < total / 100 and not key[0]=='4':
# codestats['Other'] = value + codestats.get('Other', 0)
# del codestats[key]
# print """

"""%(
# codestats,
# '|'.join([colormap.get(s[0], 'aaaaaa') for s in sorted(codestats.keys())]),
# ','.join(["%s"%(codestats[s]*100/total) for s in sorted(codestats.keys())]),
# '|'.join(sorted(codestats.keys())),
# )
# print '
'
if sum(rankstats) > 0:print """
| Links | Avg per page | Percent total |  |
| Good: | %6d | %5.3g | %5.1f%% |
| Status: | %6d | %5.3g | %5.1f%% |
| Warn: | %6d | %5.3g | %5.1f%% |
| Suspicious: | %6d | %5.3g | %5.1f%% |
| Error: | %6d | %5.3g | %5.1f%% |
| Conection: | %6d | %5.3g | %5.1f%% |
| Badlinks: | %6d | - |
| Total: | %6d | %#.3g | %2.2f%% |
""" % (
','.join(["%d"%(i*100/sum(rankstats)+0.5) for i in rankstats]),
3.14159-(3.14159*rankstats[0]/sum(rankstats)),
rankstats[0], (rankstats[0]/float(pagecount)), (rankstats[0]*100/float(sum(rankstats))),
rankstats[1], (rankstats[1]/float(pagecount)), (rankstats[1]*100/float(sum(rankstats))),
rankstats[2], (rankstats[2]/float(pagecount)), (rankstats[2]*100/float(sum(rankstats))),
rankstats[4], (rankstats[4]/float(pagecount)), (rankstats[4]*100/float(sum(rankstats))),
rankstats[5], (rankstats[5]/float(pagecount)), (rankstats[5]*100/float(sum(rankstats))),
rankstats[3], (rankstats[3]/float(pagecount)), (rankstats[3]*100/float(sum(rankstats))),
rankstats[6],
sum(rankstats), (sum(rankstats)/float(pagecount)), 100,
)
if barelinks:
print parser.parser("These pages appear to have bare links: "+', '.join(sorted(["%s (%d)"%(k,v) for k,v in barelinks.items()])))
def printLinksPage(filename, threshold='1', code=None):
if not os.path.exists('./jobs/%s.tsv' % filename.replace('/', '|')):
printWikiFile('doesnotexist')
return
printFile('tableHeader')
lastpage = ''
f = open('./jobs/%s.tsv' % filename.replace('/', '|'))
try:
for line in f:
if line.startswith('#'):
continue
#(pagename, wikilink, status, reason, content_length, content_type, rank, comments) = line.split('\t')
v = line.split('\t')
if not len(v) > 6 or v[6] < threshold:
continue
if code and v[2] != code:
continue
if v[0] != lastpage:
lastpage = v[0]
anchor = parser.escapeId(lastpage[2:lastpage.find(']')])
print wikiParser('
| %s |
|---|
' % (anchor, lastpage))
# truncate response reason if too long
reason = v[3]
if len(reason) >= 37: # "Temporary failure in name resolution" is the longest at 36 chars
reason = reason[:25] + '...'
try:
# Not very safe!
meta = ('
'.join(['='.join(tuple) for tuple in eval(v[4]).items()])).encode('utf-8')
except:
meta = ''
print parser.parser('
\t| – | %s | %s | %s | %s | %s |
' % (v[6], '', meta.replace('[[', '[['), v[1], reason, v[2], v[7] or reason))
#print ('
\t| %s | %s | %s | %s |
' % (v[6], v[2], reason, v[1], v[7]))
finally:
print ''
f.close()
def main():
form = cgi.FieldStorage()
action = form.getfirst('action', 'view')
threshold = form.getfirst('threshold', '1')
global urlname # used to set base
urlname = form.getfirst('title', 'Main_Page')
title = urlname.replace('_', ' ')
filename = urlname.replace('/', '|') #+ '.tsv'
# Previously we used '-' to represent '/', now we use '|'
#filename = filename.replace('/', '-')
global viewtabs
viewtabs = [("#", "Special page", True)]
global notes
notes = ()
# Wikitext files
if action == 'view':
viewtabs = [
("/~dispenser/view/%s" % urlname, "Page", True),
("/~dispenser/source/%s"%urlname, "View source"),
]
startContent(title)
printWikiFile(urlname)
try:
modifiedtime = os.path.getmtime('./text/%s.html'%filename)
notes = ('This page was last modified on %s' % time.ctime(modifiedtime),)
except OSError:
pass
elif action == 'source' or action == 'edit':
viewtabs = [
("/~dispenser/view/%s" % urlname, "Page"),
("/~dispenser/source/%s"%urlname, "View source", True),
]
startContent('View source: %s' % title)
try:
f = open('./text/%s.html'%filename)
modifiedtime = os.stat('./text/%s.html' % filename)[9]
notes = ('Updated %s ago on %s (UTC)' % (timeago(time.time() - modifiedtime), time.ctime(modifiedtime), ),) #time.strftime('%A, %Y %d, %B at %H:%M (UTC)', time.gmtime())
print '
You can view and copy the source of this page:
'
print '
'
except IOError:
print '
%s.html does not exist.
' % urlname
# Link File Tables
elif action == 'links' or action == 'table':
viewtabs = [
("/~dispenser/summary/%s" % urlname, "Summary"),
("/~dispenser/links/%s" % urlname, "Links", True),
# ("/~dispenser/job/%stsv" % filename, "Download (.tsv)"),
]
startContent("Link results from %s" % title)
printHelpLinks()
printLinksPage(urlname, threshold, form.getfirst('code'))
elif action == 'cache':
startContent("Cache of %s" % title)
requestname = filename.replace('/', '|')
if not ':' in requestname:
requestname="wikipedia:en:"+requestname
try:
f = open('./cache/%s' % requestname)
print """
"""
printFile('checklinks-cache-warning')
print '
refresh' % (requestname.replace('wikipedia:', 'w:'))
print '
'
print '| %s |
' % (requestname.replace('wikipedia:', 'w:'), requestname.replace('wikipedia:en:', ''), requestname)
#print '| %s |
' % (parser.escapeId(page.title().encode('utf-8')), page.hostname(), page.urlname(), page.aslink()[2:-2], page.title())
printFile('tableHeader')
for line in f:
cells = line.split('\t')
print ''%cells[8]
print "| %s | "%cells[1]
print "%s | "%cells[2]
print '%s | '%(cells[4],cells[3])
print "%s | "%cells[9]
print '
'
print '
'
except IOError:
print 'No cached result exist of
%s' % (requestname, requestname)
#raise
viewtabs = [
("/~dispenser/cache/%s" % requestname, "Cache", True),
]
elif action == 'report' or action=='summary':
viewtabs = [
("/~dispenser/summary/%s" % urlname, "Summary", True),
]
startContent("Summary of %s" % title)
printHelpLinks()
printReportSummary(urlname)
elif action == 'oldsummary':
startContent("Summary of %s" % title)
printHelpLinks()
printLinksSummary(urlname)
viewtabs = [
("/~dispenser/summary/%s" % urlname, "Summary", True),
("/~dispenser/links/%s" % urlname, "Links"),
]
# Queue list
elif action == 'list':
viewtabs = [
("/~dispenser/list/%s" % urlname, "Jobs", True),
]
startContent('%s' % title)
jobs = file('./jobs/%s'% urlname.lower())
if ' ' in title:
print wikiParser("''See also: [[Category:%s]]''"%title)
print '
'
for s in jobs:
if not s.strip(): continue
cmd = s.strip('\n\r').split('\t')
for item in cmd[1:]:
if item.startswith('-list:jobs/'):
cmd.remove(item)
while len(cmd) <= 3:cmd.append('')
try:cmd.append(time.strftime("%d %B %Y at %H:%M",
time.gmtime(os.path.getmtime("./reports/%s"%cmd[0]))))
except OSError:pass
print '| %s | %s |
' % (cmd[0].replace('|', '/'), cmd[1], ''.join(cmd[2:]))
print ' |
'
else:
startContent()
print 'No action by that name specified'
endContent()
printFile('nav')
printFile('footer', '
'.join(notes + ("Page rendering in %#3.2f seconds" % time.clock(),)))
print ''
def handleUrlAndHeader():
redirect = os.getenv("REQUEST_URI", '')
redirect = redirect.replace('%20', '_').replace('+', '_')
redirect = redirect.rstrip('_')
if redirect != os.getenv("REQUEST_URI", ''):
# http://turbo-technical-report.blogspot.com/2006/11/server-side-301-302-http-response.html
# Saved me quite a bit of trouble. Thanks!
print "Status: 301"
print 'Location: ' + redirect
print 'Content-Type: text/html; charset=utf-8'
print
print """
301 Moved Permanently
Moved Permanently
The document has moved here.
""" % redirect
return False
else:
#print 'Content-Type: application/xhtml+xml; charset=utf-8'
print 'Content-Type: text/html; charset=utf-8'
print
return True
if __name__ == "__main__" and handleUrlAndHeader():
try:
print ''
print ''
main()
finally:
print ''