Had a spare hour last Thursday and decided to write a little twitter bot. There he is above. His name is Grammer_Man and he corrects other twitter users' misspellings, using data scraped from these Wikipedia pages.

Responses have been pouring in already, some agitated, some confused, but most positive -- which was a pleasant surprise. In any event, the minimal amount of effort in coding has paid off many times over in entertainment.

You can see who's responding at the moment by searching for @grammer_man, and also by checking his list of favourites.

Here is the (somewhat slapdash) code that powers our fearless spelling Nazi:

grabber.py

This module grabs the spelling data from Wikipedia.

#!/usr/bin/env python # -*- coding: utf-8 -*- import os import pickle import requests from BeautifulSoup import BeautifulSoup def grab ( letter ): ''' Grabs spellings from wikipedia ''' url = 'http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/ %s ' % letter html = requests . get ( url ) . content soup = BeautifulSoup ( html ) bullets = soup . findAll ( 'li' ) retval = {} for bullet in bullets : if 'plainlinks' in repr ( bullet ): values = bullet . text . split ( '(' ) if len ( values ) == 2 : retval [ values [ 0 ]] = values [ 1 ][: - 1 ] # shave off the ) at end return retval def get_spellings (): ''' Returns a dictionary of {false: correct} spellings ''' if not os . path . exists ( 'words.pkl' ): retval = {} for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' : print 'Getting typos - %s ' % c retval . update ( grab ( c )) print 'Dumping...' f = open ( 'words.pkl' , 'w' ) pickle . dump ( retval , f ) f . close () return retval else : f = open ( 'words.pkl' , 'r' ) retval = pickle . load ( f ) f . close () return retval if __name__ == '__main__' : get_spellings ()

bot.py

The bot. Selects misspellings at random, searches for them, responds to them, while also taking breaks between tweets and longer breaks every few hours.

#!/usr/bin/env python # -*- coding: utf-8 -*- import os import random import time import pickle import twitter from grabber import get_spellings API = twitter . Api () MESSAGES = u ''' Hey $USERNAME, didn't you mean $CORRECT there? # # All messages stored in here, one per line. # ''' . split ( '

' ) def compose_message ( twitter_post , mistake , correct ): ''' Choose a message from MESSAGES at random, substitute fields to personalise it and check if it exceeds the twitter message limit. Try this 100 times before failing. ''' retries = 0 while retries & lt ; 100 : message = MESSAGES [ random . randint ( 0 , len ( MESSAGES ) - 1 )] message = message . replace ( '$USERNAME' , '@ %s ' % twitter_post . user . screen_name ) message = message . replace ( '$MISTAKE' , '" %s "' % mistake ) . replace ( '$CORRECT' , '" %s "' % correct ) if message and len ( message ) & lt ; 141 : return message return None def correct_spelling ( twitter_post , mistake , correct ): ''' Correct someone's spelling in a twitter_post ''' print u 'Correcting @ %s for using %s ...' % ( twitter_post . user . screen_name , mistake ) message = compose_message ( twitter_post , mistake , correct ) if not message : print u 'All messages were too long... Aborting...' return False else : failures = 0 try : API . PostUpdate ( message , in_reply_to_status_id = twitter_post . id ) except Exception , e : print 'Failed to submit tweet ( %s ).' return False return True def search ( word ): ''' Search twitter for uses of a word, return one if it's been used recently. Otherwise return None. TODO: Add time awareness. ''' print 'Searching for uses of %s ...' % word results = API . GetSearch ( word ) if results : for result in results : if not check_if_done ( result . id ) and \ not result . user . screen_name == 'grammer_man' and word in result . text : return result return None def check_if_done ( id ): ''' Checks if a tweet has already been responded to ''' if os . path . exists ( 'done.pkl' ): f = open ( 'done.pkl' , 'r' ) done = pickle . load ( f ) f . close () if id in done : return True return False def update_done ( id ): ''' Updates a list of tweets that've been replied to ''' if os . path . exists ( 'done.pkl' ): f = open ( 'done.pkl' , 'r' ) done = pickle . load ( f ) f . close () else : done = [] done . append ( id ) f = open ( 'done.pkl' , 'w' ) pickle . dump ( done , f ) f . close () def main (): ''' Main program flow ''' words = get_spellings () counter = 0 while True : word = random . choice ( words . keys ()) post = search ( word ) if counter & gt ; 100 : rand_time = random . randint ( 120 * 60 , 240 * 60 ) print 'Done %s tweets, sleeping for %s minutes' % ( counter , rand_time / 60 ) time . sleep ( rand_time ) counter = 0 # TODO: PROPERLY PRUNE THE MISTAKES/CORRECTIONS FROM WIKIPEDIA AND REMOVE THIS: if not u ',' in word + words [ word ] and not u ';' in word + words [ word ]: if post : result = correct_spelling ( post , word , words [ word ]) if result : counter += 1 print '# %s Done' % counter update_done ( post . id ) time . sleep ( random . randint ( 300 , 500 )) if __name__ == '__main__' : main ()

Grammer_Man uses the following libraries: