'''
Created on Nov 7, 2011

@author: rcd
'''
import re               # regular expression patterns
import random           # random module: for choice() function
import string           # string module: for string.punctuation value
import FileUtilities    # useful utilities for working with files


# Regular Expressions related to the RSG grammar
# A definition is delineated by curly braces 
#  - any other characters are allowed between { }
DEFINITION_PATTERN = re.compile(r"\{\s*([^\}]+)\s*\}")
# A definition's name is first non-terminal, i.e., word surrounded by comparison signs
#  - only alphabetic, digit, -, or _ characters allowed between < >
NAME_PATTERN = re.compile(r"(\<[\w-]+\>)\s*")
# Rule is terminated by semicolon
#  - any other characters are allowed before ;
RULE_PATTERN = re.compile(r"([^;]+)\s*;")


def generate (grammar, nonTerminal):
    '''
    Generates list of strings by expanding the given non-terminal using the given grammar rules.
     - grammar is dictionary keys are strings, representing non-terminals, and
          whose values are lists of lists of strings, representing rules to expand
     - nonTerminal is string that represents a definition's name
     A rule is expanded by choosing a random rule from the given definition and 
     building a list of words where the words come from either:
       - a terminal word directly in the rule
       - a list of words generated from expanding each non-terminal
    '''
    result = []
    rules = grammar[nonTerminal]
    # TODO: complete this function
    return result


def pretty_print (words):
    '''
    Print out the given list of strings separated by spaces.
    Each line should be limited to a maximum of 50 characters and 
      spaces should appear only between words, not punctuation.
    '''
    # TODO: complete this function produce nicer output
    print " ".join(words)


def print_grammar(grammar):
    """
    Print grammar dictionary for debugging purposes.
    """
    for (name, rules) in sorted(grammar.items()):
        print name
        for opt in rules:
            print "  ", opt


def is_non_terminal(word):
    '''
    Returns True iff given word is a non-terminal not an ordinary word.
    '''
    return NAME_PATTERN.match(word) != None


def is_word(word):
    '''
    Returns True iff given word is not just a punctuation character.
    '''
    return len(word) > 1 or word[0] not in string.punctuation


def initialize(filename):
    """
    Read input file of grammar definitions and creates dictionary where:
     - keys are strings, representing non-terminals, and
     - values are lists of lists of strings, representing rules
    """
    grammar = {}
    file = FileUtilities.get_file(filename)
    text = FileUtilities.get_as_string(file)
    for definition in DEFINITION_PATTERN.findall(text):
        (before, name, rules) = NAME_PATTERN.split(definition, 1)
        grammar[name] = [r.split() for r in RULE_PATTERN.findall(rules)]
    return grammar


print_grammar(initialize("../data/apt-issues.g"))
