# proof of concept for searching reddit and sorting results according to karma

# by meowmix4jo

#

# Tested using Python 2.6.5

#

# This script searches reddit via the google AJAX api and then grabs the karma from reddit.

# These results are then stored in a list, which gets sorted and printed.

# It shouldn't be too hard to get a web version going if there is interest.

#

# Output is:

# x - y

# z

# x is karma, y is title, z is url

#

# Google and reddit both return results in JSON, simplejson was used to parse this data.

# If you don't have this it can be downloaded at

# http://pypi.python.org/pypi/simplejson

#

# I've heard alot of people ask for search where you can sort by karma,

# but I've never seen someone actually attempt it.

###regex samples###

#sub-reddit

#reddit\.com/r/[^/]*/\Z

#post

#reddit\.com/r/[^/]*/comments/[^/]*/[^/]*/\Z

#reddit\.com/r/[^/]*/comments/[^/]*/\Z

#comment

#reddit\.com/r/[^/]*/comments/[^/]*/[^/]*/[^/]*/\Z

import urllib , httplib , simplejson , re , urlparse

def custom_itemgetter ( *items ) :

#this is just operator.itemgetter with a cheap hack to get userpages, subreddits, etc to come up before submissions

if len ( items ) == 1 :

item = items [ 0 ]

if items [ 0 ] == 'null' :

item = 9999999

def g ( obj ) :

return obj [ item ]

else :

def g ( obj ) :

return tuple ( obj [ item ] for item in items )

return g

##Change what you want to search for here##

search = 'meow'

##---##

google_api = 'ajax.googleapis.com'

reddit_api = 'reddit.com'

#Connect to google, and perform a search

google = httplib . HTTPConnection ( google_api )

query = '/ajax/services/search/web?' + urllib . urlencode ( { 'v' : '1.0' , 'q' : 'site:reddit.com ' + search , 'rsz' : 'large' } )

google. request ( "GET" , query )

search_results = google. getresponse ( )

gjson = simplejson. loads ( search_results. read ( ) )

results = gjson [ 'responseData' ] [ 'results' ]

to_sort = [ ]

x = 0

#We have the results from google, let's get post information from reddit

for i in results:

#This is another cheap hack to remove URL queries, as they break JSON fetching

i [ 'url' ] = urlparse . urlparse ( urllib . unquote ( i [ 'url' ] ) )

i [ 'url' ] = urlparse . urlunparse ( [ i [ 'url' ] [ 0 ] , i [ 'url' ] [ 1 ] , i [ 'url' ] [ 2 ] , i [ 'url' ] [ 3 ] , '' , '' ] )

#Make sure all URL's end with a slash to make regex easier

if not i [ 'url' ] [ - 1 ] == '/' :

i [ 'url' ] + = '/'

if ( re . search ( 'reddit \. com/r/[^/]*/comments/[^/]*/[^/]*/ \Z ' , i [ 'url' ] ) or re . search ( 'reddit \. com/r/[^/]*/comments/[^/]*/ \Z ' , i [ 'url' ] ) or re . search ( 'reddit \. com/r/[^/]*/comments/[^/]*/[^/]*/[^/]*/ \Z ' , i [ 'url' ] ) ) :

reddit = urllib . urlopen ( i [ 'url' ] + '.json' )

rjson = simplejson. loads ( reddit. read ( ) )

score = rjson [ 0 ] [ 'data' ] [ 'children' ] [ 0 ] [ 'data' ] [ 'score' ]

to_sort. append ( [ score , i [ 'title' ] . encode ( 'utf-8' ) , i [ 'url' ] ] )

else :

#These are subreddits, user pages, etc

to_sort. append ( [ 'null' , i [ 'title' ] . encode ( 'utf-8' ) , i [ 'url' ] ] )

x + = 1

#Sort results by highest karma

to_sort = sorted ( to_sort , reverse = True , key = custom_itemgetter ( 0 ) )

for rec in to_sort: