'''
Created on Mar 19, 2018

@author: ola
'''
import time,string

def clean(word):
    '''
    remove punctuation from beginning and end of word,
    convert to lower case, return such a cleaned word
    '''
    for i in range(len(word)):
        if not word[i] in string.punctuation:
            break
    j = len(word)-1
    for j in range(len(word)-1,0,-1):
        if not word[j] in string.punctuation:
            break
    return word[i:j+1]

def fileToList(filename):
    f = open(filename)
    st = f.read().lower().strip()
    f.close()
    return [clean(w) for w in st.split()]

def fastcount(words):
    d = {}
    for w in words:
        if w in d:
            d[w] += 1
        else:
            d[w] = 1
    return sorted(d.items())

def slowcount(words):
    pairs = [(w,words.count(w)) for w in set(words)]
    return sorted(pairs)

if __name__ == '__main__':
    st = fileToList("data/hawthorne.txt")
    
    start = time.process_time()
    pairs = slowcount(st)
    stop = time.process_time()
    #print(pairs)
    print("time to run %.3f" % ((stop-start)))
    start = time.process_time()
    pairs2 = fastcount(st)
    stop = time.process_time()
    #print(pairs)
    print("time to run %.3f" % ((stop-start)))
    if pairs != pairs2:
        print("not equal")