#!/usr/bin/python import MySQLdb import time class GlobalUsageStats(object): def connect(self, host, db): self.output('Connecting to %s', host) self.conn = MySQLdb.connect(host = host, read_default_file = '~/.my.cnf', db = db) self.cur = self.conn.cursor() self.now = time.strftime('%Y%m%d%H%M%S') def load_all_categories(self): self.cur.execute('SELECT cat_id, cat_name FROM categories') self.categories = dict(self.cur.fetchall()) self.output('Loaded %i categories', len(self.categories)) def update_categorylinks(self, cat): self.output('Updating images in %s', self.categories[cat]) self.cur.execute(""" INSERT INTO categorylinks (cl_category, cl_image, cl_timestamp) SELECT %s, page_title, %s FROM commonswiki_p.categorylinks JOIN commonswiki_p.page ON page_id = cl_from WHERE page_namespace = 6 AND cl_to = %s """, (cat, self.now, self.categories[cat]) ) self.conn.commit() self.output('Inserted %i images', self.cur.rowcount) def update_statistics(self, cat): self.output('Updating statistics in %s', self.categories[cat]) self.cur.execute(""" SELECT cl_image FROM categorylinks WHERE cl_category = %s AND cl_timestamp = %s """, (cat, self.now)) images = [i[0] for i in self.cur.fetchall()] for i in images: self.cur.execute(""" INSERT IGNORE INTO stats (st_image, st_wiki, st_timestamp, st_count) SELECT gil_to, gil_wiki, %s, COUNT(*) AS c FROM commonswiki_p.globalimagelinks WHERE gil_to = %s GROUP BY gil_wiki """, (self.now, i)) self.output('%s used on %i wikis', i, self.cur.rowcount) self.conn.commit() def output(self, msg, *args): print time.strftime('[%Y-%m-%d %H:%M:%S]'), msg % args def run(self): self.load_all_categories() for cat in self.categories: self.update_categorylinks(cat) self.update_statistics(cat) if __name__ == '__main__': import sys stats = GlobalUsageStats() stats.connect(sys.argv[1], sys.argv[2]) stats.run()