''' Created on Nov 8, 2010 Modified on April 4, 2011 @author: ola ''' import re,random def generate_from(nont,rules): """ using the nonterminal in string nont (starts with <) look up definition in dictionary rules, pick production to expand (choose at random from list associated with nont in dictionary rules) and return list that consists of expanding the non-terminal using the randomly chosen production for the non-terminal """ words = [] all_rules = rules[nont] chosen_rule = random.choice(all_rules) for word in chosen_rule: if word.startswith("<"): words.extend(generate_from(word,rules)) else: words.append(word) return words def start(rules): output = generate_from("",rules) llen = 0 for word in output: if llen+len(word) > 45: print llen = 0 llen += len(word) print word, print print def parse_rule(rule,dictionary): """ rule is a string in the proper format for an RSG grammar definition (complete {...}) modifies dictionary (SIDE_EFFECT) by adding a key for the non-terminal in the definition and the value is the list of rules for the non-terminal """ pattern = r"{\s*(<[^>]+>)(.*)}" mat = re.match(pattern, rule) if mat: nont = mat.group(1).strip() remainder = mat.group(2) dictionary[nont] = [] partPattern = r"([^;]+);" for mat in re.finditer(partPattern,remainder): production = mat.group(1) parts = production.strip().split() dictionary[nont].append(parts) else: print rule,"doesn't match" def initialize(source): """ Create a dictionary of grammar definitions from source and return the dictionary. Keys are non-terminals, values are lists of rules to expand the non-terminals (each rule is a list) """ text = source.read() flat = re.sub(r"\s+"," ",text) dictionary = {} gram = r"{[^}]+}" for m in re.finditer(gram, flat): #print m.group() parse_rule(m.group(),dictionary) return dictionary def main(): f = open("Battle Speech.g") d = initialize(f) start(d) start(d) f.close() if __name__ == "__main__": main()