'''
Created on Apr 4, 2018

@author: ola
'''

def most_frequent(fname):
    words = open(fname).read().split()
    tups = [(words.count(w),w) for w in set(words)]
    return sorted(tups)[-1][1]

def most_frequent2(fname):
    d = {}
    for w in open(fname).read().split():
        if w not in d.keys():
            d[w] = 0
        d[w] += 1
    mx = max(d.values())
    return [x for x in d if d[x] == mx]

def most_similar(data):
    xs = []
    ys = []
    best = -1
    for xi in range(len(data)):
        for yi in range(xi+1,len(data)):
            size = len(set(data[xi]) & set(data[yi]))
            if size > best:
                best = size
                xs = data[xi]
                ys = data[yi]
    
    return (xs,ys)

def most_similar_bad(data):
    xs = []
    ys = []
    best = -1
    for x in data:
        for y in data:

            size = len(set(x) & set(y))
            if size > best:
                best = size
                xs = x
                ys = y
    
    return (xs,ys)

if __name__ == '__main__':
    fname = "/data/shakespeare/romeo.txt"
    data = [[1,2,3], [2,3,4], [5,6,1], [5,2,1], [7,8,2], [5,8,1]]
    print(most_frequent(fname))
    print(most_frequent2(fname))
    best = most_similar_bad(data)
    print(best)