'''
Created on March 21, 2011

@author: ola
'''
import time

def fileStatsList(filename):
    file = open(filename)
    stats = []
    for word in file.read().split():
        word = word.lower()
        found = False
        for pair in stats:
            if pair[0] == word:
                pair[1] += 1
                found = True
                break
        if not found:
            stats.append([word,1])
    
    
    file.close()
    return stats

def fileStatsDictionary(filename):
    file = open(filename)
    stats = {}
    for word in file.read().split():
        word = word.lower()
        if word in stats:
            stats[word]  += 1
        else:
            stats[word] = 1
    file.close()
    return stats

def max_list(data):
    return sorted([(elt[1],elt[0]) for elt in data])[-1]

def max_dict(data):
    return sorted([(y,x) for (x,y) in data.items()])[-1]

def benchmark(filename):
    funcs = [fileStatsList,fileStatsDictionary]
    for f in funcs:
        start = time.time()
        data = f(filename)
        end = time.time()
        print str(f)[10:str(f).index(' at')],(end-start)
        if (type(data) == type({})):
            print "most freq",max_dict(data)
        else:
            print "most freq",max_list(data)
    

if __name__ == "__main__":
    benchmark('/data/poe.txt')
