Lecture Notes 9#

Working with files#

# !mv ../../Downloads/HistoricalData_1683036550517.csv apple.csv
!head apple.csv
Date,Close/Last,Volume,Open,High,Low

05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64

04/28/2023,$169.68,55275850,$168.49,$169.85,$167.8801

04/27/2023,$168.41,64902330,$165.19,$168.56,$165.19

04/26/2023,$163.76,45498800,$163.055,$165.28,$162.80

04/25/2023,$163.77,48714060,$165.19,$166.305,$163.73

04/24/2023,$165.33,41949580,$165.00,$165.60,$163.89

04/21/2023,$165.02,58337340,$165.05,$166.4521,$164.49

04/20/2023,$166.65,52456380,$166.09,$167.87,$165.56

04/19/2023,$167.63,47720170,$165.80,$168.16,$165.54
file = open('apple.csv')
type(file) # what Python calls a file type
_io.TextIOWrapper
dir(file)
['_CHUNK_SIZE',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_checkClosed',
 '_checkReadable',
 '_checkSeekable',
 '_checkWritable',
 '_finalizing',
 'buffer',
 'close',
 'closed',
 'detach',
 'encoding',
 'errors',
 'fileno',
 'flush',
 'isatty',
 'line_buffering',
 'mode',
 'name',
 'newlines',
 'read',
 'readable',
 'readline',
 'readlines',
 'reconfigure',
 'seek',
 'seekable',
 'tell',
 'truncate',
 'writable',
 'write',
 'write_through',
 'writelines']
# read the content of the whole file to a string
file = open('apple.csv')
file.read()[:100]
'Date,Close/Last,Volume,Open,High,Low\n05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64\n04/28/2023,'
file.read() #already at end of file, return empty string
''
file.seek(0) #position the file in the beginning
0
file.readlines()[:10] # a list of lines
['Date,Close/Last,Volume,Open,High,Low\n',
 '05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64\n',
 '04/28/2023,$169.68,55275850,$168.49,$169.85,$167.8801\n',
 '04/27/2023,$168.41,64902330,$165.19,$168.56,$165.19\n',
 '04/26/2023,$163.76,45498800,$163.055,$165.28,$162.80\n',
 '04/25/2023,$163.77,48714060,$165.19,$166.305,$163.73\n',
 '04/24/2023,$165.33,41949580,$165.00,$165.60,$163.89\n',
 '04/21/2023,$165.02,58337340,$165.05,$166.4521,$164.49\n',
 '04/20/2023,$166.65,52456380,$166.09,$167.87,$165.56\n',
 '04/19/2023,$167.63,47720170,$165.80,$168.16,$165.54\n']
file.seek(0)    # again, start from the beginning
file.readline() # read one line at the time, first the header
'Date,Close/Last,Volume,Open,High,Low\n'
file.readline() # then the first data line
'05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64\n'
file.readline()  # then the second data line
'04/28/2023,$169.68,55275850,$168.49,$169.85,$167.8801\n'

Consider a script where we

  • read all lines with prices on a given transaction day

  • at the end of the day we buy $1 worth of shares (assuming that is possible)

  • sum up the total share values

  • at the end calculate the total dollar value of the accumulated shares on the final day

total_shares = 0
for i, line in enumerate(open('apple.csv')):
    #if line[:4] == 'Date':
    #   continue
    if i == 0:
        continue
    _
    #print(line)
    fields = line.split(',')
    #print(fields)
    closing = fields[1] # second element in list
    #print(closing)
    closing = closing[1:]  # skip the first characgter
    #print(closing)
    closing = float(closing)
    #print(closing)
    if i == 1:
        final_closing = closing
    total_shares += 1/closing  # number of shares that one dollar buys
    
print("Total shares", total_shares)
print("Final closing price", final_closing)
print("Final value", round(total_shares*final_closing, 2))
print("Total invested:", i)
    
    
Total shares 63.04476565302408
Final closing price 169.59
Final value 10691.76
Total invested: 2517

csv library#

As the processing of csv (comma separated value) files is such a common problem, specialized libraries for this have been developed and is part of the standard library

import csv
dir(csv)
['Dialect',
 'DictReader',
 'DictWriter',
 'Error',
 'QUOTE_ALL',
 'QUOTE_MINIMAL',
 'QUOTE_NONE',
 'QUOTE_NONNUMERIC',
 'Sniffer',
 'StringIO',
 '_Dialect',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '__version__',
 'excel',
 'excel_tab',
 'field_size_limit',
 'get_dialect',
 'list_dialects',
 're',
 'reader',
 'register_dialect',
 'unix_dialect',
 'unregister_dialect',
 'writer']

Here we use the DictReader function which maps the data lines in the file to dictionaries with the headers as keys.

To examine the first data item in the loop (and then stop) we may put in the break statement

for data in csv.DictReader(open('apple.csv')):
    print(data)
    break
{'Date': '05/01/2023', 'Close/Last': '$169.59', 'Volume': '52472940', 'Open': '$169.28', 'High': '$170.45', 'Low': '$168.64'}

A modification of the previous problem is now

total_shares = 0
for i, data in enumerate(csv.DictReader(open('apple.csv')), start=1):
    # print(data)
    total_shares += 1/float(data['Close/Last'][1:])
    
print("Total shares", total_shares)
print("Final closing price", final_closing)
print("Final value", round(total_shares*final_closing, 2))
print("Total invested:", i)
Total shares 63.04476565302408
Final closing price 169.59
Final value 10691.76
Total invested: 2517

Here we note that

  • enumerate can be used if we need access to a loop counter as well as the data - we now have two loop variables i, data

  • the start=1 option to enumerate means we start counting from one (rather than zero)

Moving code to a file

%%file stocks.py
import csv
import sys

stock_data = sys.argv[1]

total_shares = 0
for i, data in enumerate(csv.DictReader(open(stock_data)), start=1):
    # print(data)
    closing_price = float(data['Close/Last'][1:])
    total_shares += 1/closing_price
    if i == 1:
        final_closing = closing_price
    
print("Total shares", total_shares)
print("Final closing price", final_closing)
print("Final value", round(total_shares*final_closing, 2))
print("Total invested:", i)

result_file = stock_data.replace('csv', 'results')
f = open(result_file, mode='w')
f.write(f"Total shares {total_shares}\n")
f.write(f"Final value {round(total_shares*final_closing, 2)}\n")
Overwriting stocks.py

gives us a script locally for passing a csv file as an argunent and which saves result to a new file

!python stocks.py apple.csv
Total shares 63.04476565302408
Final closing price 169.59
Final value 10691.76
Total invested: 2517
$ cat apple.results
Total shares 63.04476565302408
Final value 10691.76
%cat apple.results
Total shares 63.04476565302408
Final value 10691.76