'''
Created on Nov 8, 2011

@author: rodger
'''
import re

s = '100 NORTH MAIN ROAD'
print s
s = s.replace('ROAD','RD.')
print s

s = '100 NORTH BROAD ROAD'
print s
s = s.replace('ROAD','RD.')
print s

s = '100 NORTH BROAD ROAD'
s2 = re.sub('ROAD', 'RD.',s)
print s2

s = '100 NORTH BROAD ROAD'
s2 = re.sub('ROAD$', 'RD.',s)
print s2

s = "Wow thaaaaaat isssssssssss greeaaat"
print s
s2 = re.sub('e+','e', s)
s2 = re.sub('a+','a', s2)
s2 = re.sub('s+','s', s2)
print "Now without extra a's, s's and e's"
print s2
print


str = "987 56 abc ab65 123 0 045 8 5.68"
print str
p = re.compile("[0]|[1-9][0-9]*")  # created a regular expression pattern
nums = p.findall(str) # using reg expr pattern to find all matches and
    # return as a list
print "First attempt to find integers"
print nums

# note when you use a backlash to represent a regular expression
# concept, then you must put "r" in front of the string to indicate
# it is a raw string. Without the r, Python interrupts the \b as a 
# the string literal character backspace.
#
p = re.compile(r"\b[0]\b|\b[1-9][0-9]*\b") 
nums = p.findall(str) 
print "second attempt to find integers"
print nums


# Note the problem why the decimal number is broken into two
#  integer pieces is because '.' is valid for word boundary. 

phrase = "grate surrogate break ate later slater yeah gross"
print phrase
print "words that end in 'ate'"
p = re.compile(r"[a-z]*ate\b")
values = p.findall(phrase)
print values

print "words that don't start with g"
p = re.compile(r"\b[a-fh-z][a-z]*")
values = p.findall(phrase)
print values

print "words that don't start with either s or g"
p = re.compile(r"\b[a-fh-rt-z][a-z]*")
values = p.findall(phrase)
print values