'''
Created on Apr 12, 2011

@author: ola,rcd
'''
# GIVEN
def getFile (name=''):
    """
    get a file for reading from a variety of sources:
    - internet, via URL
    - local computer, via file name
    - local computer, chosen by the user
    """
    import tkFileDialog
    import urllib

    if name == '':
        return tkFileDialog.askopenfile()
    elif name.startswith('http'):
        return urllib.urlopen(name)
    else:
        return open(name)


# GIVEN
def getFileLetters (name=''):
    """
    open the given file and return its contents as a single string,
      just text and spaces
    """
    f = getFile(name)
    text = f.read().strip().replace('\n', ' ')
    #text = [ x+' ' for x in f.read().strip().split() ]
    f.close()
    return text


# GIVEN
def getFileWords (name=''):
    """
    open the given file and return its contents as a list of strings,
      all the words followed by spaces
    """
    f = getFile(name)
    text = [ word+' ' for word in f.read().strip().split() ]
    f.close()
    return text


# GIVEN
def printout (text, lineSize):
    """
    print the string text such that each line printed
      is close to that specified by int lineSize
    """
    line = ''
    for t in text:
        line += t
        if len(line) >= lineSize and line[-1] == ' ':
            print(line)
            line = ''
    print('\n')


# TODO IN LAB
def makeGrams (text, size):
    """
    return a list of tuples, each of the given size, 
      that represent all slices from the given text of length size
      (i.e., the first item in the list to return is the first size items,
       the next item starts at index 1 and is of length size, and the 
       last item of the list is the last size items in the text)
    """
    grams = []
    # TODO: add all sub-lists of length size to slices
    return grams


# TODO IN LAB
def makeFollowLists (slices):
    """
    return dictionary where:
      key is a tuple of all but the last item in each slice
      value is a list of all possible last items for the key
      (i.e., the following slices: 
         [ ('t','h','a'), ('t','h','e'), ('t','h','i') ]
       will be represented in the dictionary as:
         { ('t','h'):['a','e','i'] })
    """
    followLists = {}
    # TODO: build dictionary so each slice's follow set is represented
    return followLists


# TODO IN LAB
def generateText (followLists, size):
    """
    return string of length size that is generated randomly
      using the dictionary followLists to find each next item in the text
    """
    import random
    text = ''
    predictor = random.choice(followLists.keys())
    for x in range(size):
        if predictor in followLists:
            nxt = random.choice(followLists[predictor])
            # TODO: get a random value form the follow set, concatenate
            #       it to text, and then use it to make a new seed
        else:
            predictor = random.choice(followLists.keys())
    return text


# MAIN
def main (filename, nGram, textSize, numRuns, gramGeneratorFunc):    
    trainingText = gramGeneratorFunc(filename)
    slices = makeGrams(trainingText, nGram)
    followLists = makeFollowLists(slices)
    # show internal structure
    print(str(len(trainingText)) + '\t' + str(trainingText[:20]))
    print(str(len(slices)) + '\t' + str(slices[:20]))
    print(str(len(followLists)) + '\t' + str(followLists.items()[:20]))
    print('\n')
    # show example generated texts
    for n in range(numRuns):
        randomText = generateText(followLists, textSize)
        printout(randomText, 70)


main("kjv10.txt", 4, 200, 10, getFileLetters)
