#!/usr/bin/env python # -*- coding: utf-8 -*- """ Provides a count of the number of backlinks to a page. Parameters: db - The name of the database. Examples: frwiki for the French Wikipedia, jawiktionary for Japanese Wiktionary Default: enwiki callback - If specified, wraps the output into a given function call and changes Content-Type to application/json for script use. filterredir - How to filter for redirects One value: all, redirects, nonredirects Default: all ns - The assumed namespace of the page Default: 0 title - Title to search. Examples: http://toolserver.org/~dispenser/cgi-bin/backlinkscount.py?title=User:Jimbo%20Wales http://toolserver.org/~dispenser/cgi-bin/backlinkscount.py?title=Jimbo_Wales&ns=2&callback=bragging_rights&filterredir=nonredirects&db=enwiki Notes: * The "What links here" page combines the results of both backlinks and embeddedin (transclusions). * For transclusions count use embeddedincount.py tool. * This tool does not support namespace localization, use English namespaces or the ns parameter instead. * Source code is at http://toolserver.org/~dispenser/sources/backlinkscount.py """ # I, Dispenser, hereby release this program into the public domain # November 2009 import oursql, cgi, os import cgitb; cgitb.enable(logdir='tracebacks') namespaces = { 'Media': -2, 'Special': -1, '': 0, 'Talk': 1, 'User': 2, 'User_talk': 3, 'Wikipedia': 4, 'Wikipedia_talk': 5, 'Image': 6, 'Image_talk': 7, 'Mediawiki': 8, # name fudged 'Mediawiki_talk': 9, # name fudged 'Template': 10, 'Template_talk': 11, 'Help': 12, 'Help_talk': 13, 'Category': 14, 'Category_talk': 15, } def main(): form = cgi.FieldStorage(keep_blank_values=1) dbName = form.getfirst('db', form.getfirst('dbname','enwiki')) ns = form.getfirst('ns', '0') title = form.getfirst('title', '').replace(' ', '_') callback = form.getfirst('callback') if title == '': print "Content-Type: text/plain" print __doc__ return if not os.getenv("HTTP_USER_AGENT"): print 'Status: 419 No user agent' print 'Content-Type: text/plain; charset=utf-8' print print 'No user agent' print 'https://meta.wikimedia.org/wiki/User-Agent_policy' return if ':' in title: name, t = title.split(':', 1) ns = namespaces.get(name.capitalize().strip('_:'), ns) if ns != 0: title = t # Toolserver views hack if not dbName.endswith('_p'): dbName += '_p' conn = oursql.connect(db=dbName, host=dbName.replace('_', '-') + '.rrdb.toolserver.org', read_default_file="/home/dispenser/.my.cnf", charset=None, use_unicode=False) cursor = conn.cursor() # The redirect pages themselves aren't included if you combine these queries count = 0 # FIXME reports links from non-existant pages if form.getfirst('filterredir') != 'nonredirects': cursor.execute("/* backlinkscount LIMIT:60 NM */SELECT COUNT(*) FROM pagelinks WHERE pl_namespace=? AND pl_title=?", (ns, title)) count += cursor.fetchall()[0][0] if form.getfirst('filterredir') != 'redirects': cursor.execute(""" /* backlinkscount LIMIT:60 NM */ SELECT COUNT(*) FROM redirect JOIN page ON rd_from=page_id JOIN pagelinks ON pl_namespace=page_namespace AND pl_title=page_title WHERE rd_namespace=? AND rd_title=? """, (ns, title)) count += cursor.fetchall()[0][0] conn.close() if callback is not None: # getfirst treats &callback=& as None anyway callback = ''.join(c for c in callback if (c.isalnum() or c=='.' or c=='_')) print 'Content-Type: text/javascript; charset=utf-8' print print '%s(%d)'%(callback, count) else: print 'Content-Type: text/plain; charset=utf-8' print print count if __name__ == "__main__": try: import os, time; StartTime=time.time() main() except oursql.Error as (errno, strerror, extra): # FIXME this should be machine readable print 'Status: 500 SQL error' print 'Content-Type: text/plain; charset=utf-8' print print (errno, strerror) #else: # with open('./generation_stats/backlinkscount', 'a') as f: # f.write('\t'.join(( # time.strftime("%Y-%m-%d %H:%M:%S"), # "%6f"%(time.time()-StartTime), # os.getenv("HTTP_X_FORWARDED_FOR", ''), # os.getenv("HTTP_USER_AGENT", ''), # os.getenv("QUERY_STRING", ''), # ))+'\n')