'''
Created on Nov 7, 2011

@author: rcd
'''
import re               # regular expression patterns
import random           # random module: for choice() function
import string           # string module: for string.punctuation value


# Regular Expressions related to the RSG grammar
# A definition is delineated by curly braces 
#  - any other characters are allowed between { }
DEFINITION_PATTERN = re.compile(r"\{\s*([^\}]+)\s*\}")
# A definition's name is first non-terminal, i.e., word surrounded by comparison signs
#  - only alphabetic, digit, -, or _ characters allowed between < >
NAME_PATTERN = re.compile(r"(\<[\w-]+\>)\s*")
# Rule is terminated by semicolon
#  - any other characters are allowed before ;
RULE_PATTERN = re.compile(r"([^;]+)\s*;")


def isNonTerminal (word):
    '''
    Returns True iff given word is a non-terminal not an ordinary word.
    '''
    return NAME_PATTERN.match(word) != None

def isWord (word):
    '''
    Returns True iff given word is not just a punctuation character.
    '''
    return len(word) > 1 or word[0] not in string.punctuation

def printSentence (words):
    '''
    Print out the given list of strings separated by spaces.
    Each line should be limited to a maximum of 50 characters and 
      spaces should appear only between words, not punctuation.
    '''
    print " ".join(words)

def printGrammar (grammar):
    """
    Print grammar dictionary for debugging purposes.
    """
    for (name, rules) in sorted(grammar.items()):
        print name
        for opt in rules:
            print "  ", opt

def initialize (filename):
    """
    Read input file of grammar definitions and creates dictionary where:
     - keys are strings, representing non-terminals, and
     - values are lists of lists of strings, representing rules
    """
    grammar = {}
    file = open(filename)
    text = file.read()
    file.close()
    for definition in DEFINITION_PATTERN.findall(text):
        (before, name, rules) = NAME_PATTERN.split(definition, 1)
        grammar[name] = [r.split() for r in RULE_PATTERN.findall(rules)]
    return grammar

def generate (grammar, nonTerminal):
    '''
    Generates list of strings by expanding the given non-terminal using the given grammar rules.
     - grammar is dictionary keys are strings, representing non-terminals, and
          whose values are lists of lists of strings, representing rules to expand
     - nonTerminal is string that represents a definition's name
     A rule is expanded by choosing a random rule from the given definition and 
     building a list of words where the words come from either:
       - a terminal word directly in the rule
       - a list of words generated from expanding each non-terminal
    '''
    # TODO: complete this function, by replacing "pass" below with useful code
    result = []
    allRules = grammar[nonTerminal]
    chosenRule = random.choice(allRules)
    for word in chosenRule:
        # general case: non-terminal, expand it and add to the list
        if isNonTerminal(word):
            pass
        # base case: just a word, add it to the list
        else:
            pass
    return result


printGrammar(initialize("apt-issues.g"))
