you should see animals.txt and README.txt
pwd
ls
cat README.txt
cat animals.txt
A simple plain text file that contains the plain-text string "Rockefeller U." is stored in 15 bytes as:
\n
versus \r\n
\n
= New Line
\n
is the standard line terminator for text files\r
= Carriage Return
\r\n
\n
versus \r\n
, you may come across situations where the presence of Carriage Return characters is an issue.cat
, less
head
, tail
, wc
, etc.A list of animals, their body mass, and their brain mass
Question of interest: Which animal has the largest brain-to-body mass ratio? And the smallest?
Here we use the iPython Notebook "magics" to call the shell. You can also just do this from the command line
%%bash
ls
%%bash
wc Animals.txt
# what does the output mean? call: man wc
%%bash
head Animals.txt
# For convenience, assign the path and file name to variables
read_this = 'Animals.txt'
print read_this
# Approach 1: open, read, close
file_in = open(read_this, 'r')
all_lines = file_in.readlines()
file_in.close() # important to close the file
print "Name of the file: ", file_in.name
print "Is File Closed? : ", file_in.closed
print "Opening mode : ", file_in.mode
print "Softspace flag : ", file_in.softspace
with()
with open(read_this, 'r') as file_in:
lines = file_in.readlines()
# file_in.close()
type(lines)
lines[0:5]
# print each individual line -- iterate over the file
for waffles in lines:
print waffles
# repr := representation of the line, revealing hidden characters
for line in lines:
print repr(line)
String
to a List
some_line = lines[2]
print some_line
some_line[0]
type(some_line)
# What is the first element of some_line?
some_line[0]
# is that what you expected?
for _ in some_line:
print _
print type(some_line)
line_split_at_commas = some_line.split(",") # forms a python List object, splitting at the comma characters
print line_split_at_commas
print some_line.strip() # gets rid of whitespace...
# print type(some_line.strip()) # though result is still a string
my_new_line = some_line.strip()
print my_new_line[0]
line_list = some_line.strip().split(",")
print line_list
for l in line_list:
print l
3 + 3
'3' + '3'
animal, body_mass, brain_mass = line_list
print "Animal = ", animal
print "Body Mass =", float(body_mass)
print "Brain Mass =", float(brain_mass)
Do your results make sense to you?
# List-based solution
with open(read_this, 'r') as file_in:
lines = file_in.readlines()
ratios = []
for line in lines:
clean_line = line.strip().split(',')
animal, body_mass, brain_mass = clean_line
if animal == "Animal":
pass # skip the header line
else:
ratio = float(brain_mass) / (1000 * float(body_mass))
print "Animal=",animal, "\tratio=", ratio
ratios.append([animal, ratio])
# dictionary-based solution
with open(read_this, 'r') as file_in:
lines = file_in.readlines()
ratios_dict = {} # empty dictionary
for line in lines:
clean_line = line.strip().split(',')
animal, body_mass, brain_mass = clean_line
if animal == "Animal":
pass # skip the header line
else:
ratios_dict[animal] = float(brain_mass) / (1000 * (float(body_mass))) # WATCH parentheses and order of operations
print ratios_dict
print ratios_dict.keys()
print ratios_dict.values()
print (ratios_dict['Cat'])
# using the dictionary like a database
my_study_subjects = ['Cat', 'Rabbit', 'Goat']
for subj in my_study_subjects:
print subj, (ratios_dict[subj])
file_out_name = 'Animal_brain_to_body_ratio.txt'
file_out = open(file_out_name, 'w')
for r in ratios:
line_out = r[0] + ", " + str(r[1]) + "\n"
file_out.write(line_out)
file_out.close()
csv
moduleserialization