Lecture Notes 9

Lecture Notes 9#

Working with files#

# !mv ../../Downloads/HistoricalData_1683036550517.csv apple.csv

!head apple.csv

Date,Close/Last,Volume,Open,High,Low

05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64

04/28/2023,$169.68,55275850,$168.49,$169.85,$167.8801

04/27/2023,$168.41,64902330,$165.19,$168.56,$165.19

04/26/2023,$163.76,45498800,$163.055,$165.28,$162.80

04/25/2023,$163.77,48714060,$165.19,$166.305,$163.73

04/24/2023,$165.33,41949580,$165.00,$165.60,$163.89

04/21/2023,$165.02,58337340,$165.05,$166.4521,$164.49

04/20/2023,$166.65,52456380,$166.09,$167.87,$165.56

04/19/2023,$167.63,47720170,$165.80,$168.16,$165.54

file = open('apple.csv')

type(file) # what Python calls a file type

_io.TextIOWrapper

dir(file)

['_CHUNK_SIZE',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_checkClosed',
 '_checkReadable',
 '_checkSeekable',
 '_checkWritable',
 '_finalizing',
 'buffer',
 'close',
 'closed',
 'detach',
 'encoding',
 'errors',
 'fileno',
 'flush',
 'isatty',
 'line_buffering',
 'mode',
 'name',
 'newlines',
 'read',
 'readable',
 'readline',
 'readlines',
 'reconfigure',
 'seek',
 'seekable',
 'tell',
 'truncate',
 'writable',
 'write',
 'write_through',
 'writelines']

# read the content of the whole file to a string
file = open('apple.csv')
file.read()[:100]

'Date,Close/Last,Volume,Open,High,Low\n05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64\n04/28/2023,'

file.read() #already at end of file, return empty string

''

file.seek(0) #position the file in the beginning

file.readlines()[:10] # a list of lines

['Date,Close/Last,Volume,Open,High,Low\n',
 '05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64\n',
 '04/28/2023,$169.68,55275850,$168.49,$169.85,$167.8801\n',
 '04/27/2023,$168.41,64902330,$165.19,$168.56,$165.19\n',
 '04/26/2023,$163.76,45498800,$163.055,$165.28,$162.80\n',
 '04/25/2023,$163.77,48714060,$165.19,$166.305,$163.73\n',
 '04/24/2023,$165.33,41949580,$165.00,$165.60,$163.89\n',
 '04/21/2023,$165.02,58337340,$165.05,$166.4521,$164.49\n',
 '04/20/2023,$166.65,52456380,$166.09,$167.87,$165.56\n',
 '04/19/2023,$167.63,47720170,$165.80,$168.16,$165.54\n']

file.seek(0)    # again, start from the beginning
file.readline() # read one line at the time, first the header

'Date,Close/Last,Volume,Open,High,Low\n'

file.readline() # then the first data line

'05/01/2023,$169.59,52472940,$169.28,$170.45,$168.64\n'

file.readline()  # then the second data line

'04/28/2023,$169.68,55275850,$168.49,$169.85,$167.8801\n'

Consider a script where we

read all lines with prices on a given transaction day
at the end of the day we buy $1 worth of shares (assuming that is possible)
sum up the total share values
at the end calculate the total dollar value of the accumulated shares on the final day

total_shares = 0
for i, line in enumerate(open('apple.csv')):
    #if line[:4] == 'Date':
    #   continue
    if i == 0:
        continue
    _
    #print(line)
    fields = line.split(',')
    #print(fields)
    closing = fields[1] # second element in list
    #print(closing)
    closing = closing[1:]  # skip the first characgter
    #print(closing)
    closing = float(closing)
    #print(closing)
    if i == 1:
        final_closing = closing
    total_shares += 1/closing  # number of shares that one dollar buys
    
print("Total shares", total_shares)
print("Final closing price", final_closing)
print("Final value", round(total_shares*final_closing, 2))
print("Total invested:", i)
    
    

Total shares 63.04476565302408
Final closing price 169.59
Final value 10691.76
Total invested: 2517

csv library#

As the processing of csv (comma separated value) files is such a common problem, specialized libraries for this have been developed and is part of the standard library

import csv

dir(csv)

['Dialect',
 'DictReader',
 'DictWriter',
 'Error',
 'QUOTE_ALL',
 'QUOTE_MINIMAL',
 'QUOTE_NONE',
 'QUOTE_NONNUMERIC',
 'Sniffer',
 'StringIO',
 '_Dialect',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '__version__',
 'excel',
 'excel_tab',
 'field_size_limit',
 'get_dialect',
 'list_dialects',
 're',
 'reader',
 'register_dialect',
 'unix_dialect',
 'unregister_dialect',
 'writer']

Here we use the DictReader function which maps the data lines in the file to dictionaries with the headers as keys.

To examine the first data item in the loop (and then stop) we may put in the break statement

for data in csv.DictReader(open('apple.csv')):
    print(data)
    break

{'Date': '05/01/2023', 'Close/Last': '$169.59', 'Volume': '52472940', 'Open': '$169.28', 'High': '$170.45', 'Low': '$168.64'}

A modification of the previous problem is now

total_shares = 0
for i, data in enumerate(csv.DictReader(open('apple.csv')), start=1):
    # print(data)
    total_shares += 1/float(data['Close/Last'][1:])
    
print("Total shares", total_shares)
print("Final closing price", final_closing)
print("Final value", round(total_shares*final_closing, 2))
print("Total invested:", i)

Total shares 63.04476565302408
Final closing price 169.59
Final value 10691.76
Total invested: 2517

Here we note that

enumerate can be used if we need access to a loop counter as well as the data - we now have two loop variables i, data
the start=1 option to enumerate means we start counting from one (rather than zero)

Moving code to a file

%%file stocks.py
import csv
import sys

stock_data = sys.argv[1]

total_shares = 0
for i, data in enumerate(csv.DictReader(open(stock_data)), start=1):
    # print(data)
    closing_price = float(data['Close/Last'][1:])
    total_shares += 1/closing_price
    if i == 1:
        final_closing = closing_price
    
print("Total shares", total_shares)
print("Final closing price", final_closing)
print("Final value", round(total_shares*final_closing, 2))
print("Total invested:", i)

result_file = stock_data.replace('csv', 'results')
f = open(result_file, mode='w')
f.write(f"Total shares {total_shares}\n")
f.write(f"Final value {round(total_shares*final_closing, 2)}\n")

Overwriting stocks.py

gives us a script locally for passing a csv file as an argunent and which saves result to a new file

!python stocks.py apple.csv

Total shares 63.04476565302408
Final closing price 169.59
Final value 10691.76
Total invested: 2517

$ cat apple.results
Total shares 63.04476565302408
Final value 10691.76

%cat apple.results

Total shares 63.04476565302408
Final value 10691.76

Lecture Notes 9

Contents

Lecture Notes 9#

Working with files#

csv library#