#!/usr/bin/python # -*- coding: utf-8 -*- """ Readabilty.py v1.4 """ # (c) 2008 - Dispenser # # TODO: # add per paragraph readability scores, width = readability, height=amount of text parsed, seprated by sections import re, math import wikipedia, pagegenerators import cgitb; cgitb.enable() try: import parser except ImportError: parser = None ArticleWordStub = "This article seems to be a stub. Readability scores will not be accurate." ArticleWordLarge = "This article seems to be too long and probably needs to be split." ArticleLowIndex = "Longer words and more compound or complex sentences may make this article more interesting, more precise and less wordy." ArticleHighIndex = "This article seems to have too many long words and sentences for even most university graduates to easily read and understand." ArticleSizeLarge = "This article almost certainly should be divided up." ArticleSizeBig = "Probably should be divided (although the scope of a topic can sometimes justify the added reading time)" # How AWB does it # Remove tables and templates # Count instances using r"[a-zA-Z]+" flags = re.UNICODE WordCount = re.compile(r'[^\W\d]+', flags) #should include \- SentenceCount = re.compile(r'\w*\w\w[ \'"\])]*[\.\!\?]+[\s\'"\]()]', flags) def average(list): if list == []: return 0 else: return float(sum(list))/float(len(list)) def grade2ages(grade): # FIXME document where the 5.3 comes from return "%d/%d" % (grade+4.8, grade+5.8) # 5.3 +/- .5 def wikiLink(text, linkClass='extiw'): for m in re.finditer(r'\[\[(?:([^][|]*)\||)(.*?)\]\]', text): link = (m.group(1) or m.group(2)).replace(' ', '_').strip('_') text = text.replace(m.group(), r'%s' % (hostname, link, linkClass, m.group(2),)) return text def rowPrint(*cells): print '
%s' % (page.hostname(), page.site().language(), page._redirarg, page._redirarg)
return
if not wikitext.strip():
wikipedia.output('Page does not exist')
return
elif '{{disambig' in wikitext:
printWiki(wikitext, docroot='readability.py?page=%s:' % page.site().language(), linkClass = '')
return
print '| Text | Proses | |
|---|---|---|
| Counts | ||
| Averages | ||
| ' print 'Wikitext: %3.3g KB' % (len(wikitext)/1024.) print '' % (len(wikitext)/350) print 'Text: %3.3g KB' % (textstats.characters/1024.) print '' % (textstats.characters/350) print 'Proses: %3.3g KB' % (prosestats.characters/1024.) print '' % (prosestats.characters/350) print ' |
| Test | U.S. grade level | Reader age | Scale |
|---|---|---|---|
| Do not rely on information presented here. This tool produces incorrect output due to limitations in its wikitext to HTML parsing and a spurious syllable counter; not to mention the uncertainty of the readability algorithms. |