'''
Created on Sep 25, 2013

@author: rcd
'''
# read colon-separated data line-by-line
f = open("student_data.txt", "r")
data = f.readlines()

# Question: how many mac users compared to windows users
#   to answer this question, only need the last column of data
#   (but that field also contains a newline character at the end :(
computers = [ d.split(':')[-1].strip() for d in data ]
print(computers)

# First try: total number of responders - count of Mac users
numMac = computers.count('Mac OS X')
numWindows = len(computers) - numMac
print('# windows = ' + str(numWindows))
print('# mac = ' + str(numMac))

# Second try: find in each string Mac or windows separately
#  note, some people used the Other field to answer the question
count = 0
for c in computers:
    if 'mac ' in c.lower():
        count += 1
print('# mac = ' + str(count))

# Third try: collect NOT Mac or Windows users (perhaps Linux users??)
other = [ c for c in computers if c != 'Mac OS X' and c != 'Microsoft Windows' ]
print(other)
print('# other users = ' + str(len(other)))


# Question: How many undecided sophomores
splitData = [ d.strip().split(':') for d in data ]
# look! a list of lists
print(splitData)
# look! the first element in the first list in the data
print(splitData[0][0])
# Answer: compare two fields of the data given 
targets = [ d for d in splitData if d[0] == 'Sophomore' and d[1].lower().startswith('und') ]
print(targets)
print('# undeclared sophomores = ' + str(len(targets)))
