'''
Created on Nov 8, 2010
Modified on April 4, 2011
@author: ola
'''

import re,random

def generate_from(nont,rules):
    """
    using the nonterminal in string nont (starts with <)
    look up definition in dictionary rules, pick production to expand
    (choose at random from list associated with nont in dictionary rules)
    and return list that consists of expanding the non-terminal
    using the randomly chosen production for the non-terminal
    """
    
    words = []
    all_rules = rules[nont]
    chosen_rule = random.choice(all_rules)
    for word in chosen_rule:
        if word.startswith("<"):
            words.extend(generate_from(word,rules))
        else:
            words.append(word)

    return words

def start(rules):

    
    output = generate_from("<start>",rules)
    llen = 0
    for word in output:
        if llen+len(word) > 45:
            print
            llen = 0
        llen += len(word)
        print word,
    print
    print

def parse_rule(rule,dictionary):
    """
    rule is a string in the proper format for an RSG grammar definition (complete {...})
    modifies dictionary (SIDE_EFFECT) by adding a key for the non-terminal
    in the definition and the value is the list of rules for the non-terminal
    """
    pattern = r"{\s*(<[^>]+>)(.*)}"
    mat = re.match(pattern, rule)
    if mat:
        nont = mat.group(1).strip()
        remainder = mat.group(2)

        dictionary[nont] = []
        partPattern = r"([^;]+);"
        
        for mat in re.finditer(partPattern,remainder):
            production = mat.group(1)
            parts = production.strip().split()
            dictionary[nont].append(parts)
    else:
        print rule,"doesn't match"
    

def initialize(source):
    """
    Create a dictionary of grammar definitions from source
    and return the dictionary. Keys are non-terminals, values
    are lists of rules to expand the non-terminals (each rule is a list)
    """
    text = source.read()
    flat = re.sub(r"\s+"," ",text)
    dictionary = {}

    gram = r"{[^}]+}"
    for m in re.finditer(gram, flat):
        #print m.group()
        parse_rule(m.group(),dictionary)

    return dictionary
    
def main():
    f = open("Battle Speech.g")
    d = initialize(f)
    start(d)
    start(d)
    f.close()

if __name__ == "__main__":
    main()
    
