#!/usr/bin/python import urllib import sys import os import cPickle import re import psyco psyco.full() from sets import Set from fcntl import * if __name__ != '__main__': from mod_python import apache class MyURLopener(urllib.URLopener): version = 'http://nikita.ca/lj/clique.py (ljclique@nikita.ca)' global myurlopen myurlopen = MyURLopener() def parsefoaf(name,req): cached = False filename = 'fdata/' + name url = 'http://www.livejournal.com/misc/fdata.bml?user=%s' % name if os.path.exists(filename): f = open(filename) fcntl(f.fileno(), LOCK_SH) friends = cPickle.load(f) cached = True f.close() else: print >>req, 'Retrieving', url, '
' attemptsleft = 2 while True: try: fdata = myurlopen.open(url) friends = [] lines = fdata.readlines() friendsof = [l[2:-1] for l in lines if l.startswith('< ')] friends = [l[2:-1] for l in lines if l.startswith('> ')] friends = [f for f in friends if f in friendsof] break except: attemptsleft -= 1 if attemptsleft == 0: req.write('

Unable to retrieve '+url+' after 2 tries, giving up.

') print_form(req) f = open(filename, 'w') flock(f, LOCK_EX) cPickle.dump(friends, f) f.close() return (friends,cached) def print_form(req): print >>req, form(os.path.basename(req.uri)) print >>req, """

This may take a little while, especially if it has to retrieve a lot of friends data. If your browser times out, try reloading.

""" print >>req, '

Created by Nikita Borisov / %s, with new algorithm by %s. This entry describes how it works.

' % (lj_user('hukuma'), lj_user('ciphergoth')) print >>req, '' if __name__ == '__main__': sys.exit(0) else: raise apache.SERVER_RETURN, apache.OK def form(uri): f = '
\n' % uri f += """

Find the largest clique containing:
(Enter your livejournal username here).

""" return f def lj_user(user, lj=False): if lj: return '' % user else: return "[info]%s" % (user,user,user) def getfriends(root,req): friends = {} (friends[root],cached) = parsefoaf(root,req) friends[root] = Set(friends[root]) - Set([root]) for name in friends[root]: (fr,c) = parsefoaf(name,req) cached = cached or (c and name != root) friends[name] = Set(fr) & (friends[root] | Set([root])) return (friends,cached) def largest_cliques_among(candidates, mutual_friends, min_size, gregariosity=None): lc = len(candidates) if lc < min_size: return [] if lc == 0: return [Set([])] if gregariosity is None: gregariosity = {} for m in candidates: gregariosity[m] = len(mutual_friends[m] & candidates) (best_cand_score, best_cand) = min([(s, m) for (m, s) in gregariosity.iteritems()]) if best_cand_score == lc -1: return [candidates] best_cand_set = Set([best_cand]) candidates -= best_cand_set best_cand_friends = mutual_friends[best_cand] & candidates del gregariosity[best_cand] for m in best_cand_friends: gregariosity[m] -= 1 without = largest_cliques_among(candidates, mutual_friends, min_size, gregariosity) if len(without) > 0: min_size = len(without[0]) with = largest_cliques_among(best_cand_friends, mutual_friends, min_size -1) for x in with: x |= best_cand_set if len(with) > 0 and len(with[0]) > min_size: return with return with + without def output(biggest,lj=False): s = '' if len(biggest) > 1: s = 's' out = "

I am a member of %d clique%s of size %d

" % (len(biggest), s, len(biggest[0])) if len(biggest) > 1: out += '\n' else: out += '

' + ', '.join([lj_user(f,lj) for f in biggest[0]]) + '

\n' return out def unlink(f): if os.path.exists(f): try: os.unlink(f) except: pass class nowrite: def write(self,data): pass def sanitycheck(friends): friendsof = {} for u in friends.keys(): friendsof[u] = Set([f for f in friends.keys() if u in friends[f]]) for u in friends.keys(): friends[u] = (friends[u] & friendsof[u]) - Set([u]) def index(req,root=None,recompute=None): if __name__ != '__main__': os.chdir('/home/nikitab/public_html/lj') req.content_type = "text/html" header = """ Livejournal Clique Finder """ if root == None or root == "": print >>req, header print_form(req) root = root.lower() if re.search('[^A-Za-z0-9_]', root): print >>req, header print >>req, '

Invalid username

' print >>req, '

Please enter your livejournal username into the search box

' print_form(req) if recompute: (friends,x) = parsefoaf(root,nowrite()) for f in ['fdata/'+f for f in friends]: unlink(f) unlink('fdata/' + root) unlink('fdata/%s.result' % root) url = '%s?root=%s' % (os.path.basename(req.uri), root) return """ Cache cleared...

Cache cleared. Restart the search.

""" % (url,url) print >>req, header print >>req, "

Finding largest clique containing %s

" % root usernames = [root] try: filename = 'fdata/%s.result' % root f = file(filename) flock(f.fileno(), LOCK_SH) biggest = cPickle.load(f) f.close() print >>req, '

Retrieved cached result

' print >>sys.stderr, '[clique] %s satisfied from cache' % root cached = True sys.stderr.flush() except: req.write('

') req.write('Retrieving %s\'s friends
' % root) (friends,cached) = getfriends(root,req) req.write('

') req.write('Finding cliques
') # remove inconsistencies # FIXME: should really give a warning message / reload cached data sanitycheck(friends) if len(friends[root]) <= 1: print >>req, """

%s does not have any mutual friends, please verify spelling.

""" % root print_form(req) biggest = largest_cliques_among(friends[root], friends, 0) biggest = [r | Set([root]) for r in biggest] print >>sys.stderr, "Found clique", len(biggest[0]), biggest sys.stderr.flush() try: f = open(filename, 'w') flock(f, LOCK_EX) cPickle.dump(biggest, f) f.close() except: if os.path.exists(filename): os.unlink(filename) if __name__ != '__main__': print >>req, output(biggest) if cached: print >>req, """

Some friends data was cached. If you think it was out of date, you can recompute from scratch. Note that this will take quite some time.

""" \ % (os.path.basename(req.uri), root) print >>req, '

Paste this into your journal:

' print >>req, '' print_form(req) def source(req): req.content_type = "text/plain" return file(__file__).read() if __name__ == '__main__': class Req: def __init__(self): self.uri = '/~nikitab/lj/clique2.py' self.hostname = 'n5.ca' def write(self, data): sys.stdout.write(data) if len(sys.argv) > 2: index(Req(), sys.argv[1], "yes") else: index(Req(), sys.argv[1])