Chapter 5 ビックデータ: ファイルの入出力

session05-01.py
# option 1 open then close file
fileRef = open("rainfall.txt", "r")  # process the data in the file
fileRef.close()
# option 2, use with/as block
# file is automatically closed when block finishes
with open("rainfall.txt", "r") as fileRef:  # process file
    pass
session05-02.py
with open("rainfall.txt", "r") as rainFile:
    for aLine in rainFile:
        values = aLine.split()
        print(values[0], "had", values[1], "inches of rain.")
listing05-01.py
with open("rainfall.txt", "r") as rainFile:
    with open("rainfallInCM.txt", "w") as outFile:

        for aLine in rainFile:
            values = aLine.split()

            inches = float(values[1])
            cm = 2.54 * inches

            nChars = outFile.write(values[0] + " " + str(cm) + "\n")
session05-03.py
fruitPrice = 75
fruit = 'apple'
print("The {0} costs {1:d} cents".format(fruit, fruitPrice))
item = 'a dozen eggs'
itemPrice = 2.4
print("The price of {0} is ${1:.2f}".format(item, itemPrice))
myDict = {'name': 'candy bar', 'price': 95}
print("The {name} costs {price} cents.".format(**myDict))
print("This text is {0:^25s}".format("centered"))
print("This text is {0:>25s}".format("right justified"))
quizGrade = 7.5
totalPoints = 12
print("{0:.1f} is {1:.2%} of {2:d} total points".format(quizGrade, quizGrade / totalPoints, totalPoints))
session05-04.py
with open("rainfall.txt", "r") as inFile:
    aLine = inFile.readline()  # read one line

aLine
with open("rainfall.txt", "r") as inFile:
    lineList = inFile.readlines()  # read all lines as a list

lineList

with open("rainfall.txt", "r") as inFile:
    fileString = inFile.read()  # read entire file as astring

fileString
session05-05.py
import csv
with open("earthquakes3.csv", "r") as inFile:
    csvReader = csv.reader(inFile)  # pass file to csv reader
    for line in csvReader:  # read each interpreted line
        print(line)
session05-06.py
import csv
with open("earthquake.csv", "r") as inFile:
    csvReader = csv.reader(inFile)  # feed file to csv reader
    titles = next(csvReader)  # read first line with titles
    colNum = 0  # prime the condition
    while titles[colNum] != "mag":
        colNum = colNum + 1  # update the condition
    print("The magnitude is found in column", colNum)
listing05-02.py
import csv


def makeDataList(dataName):
    with open("earthquake.csv", 'r') as inFile:
        dataList = []

        csvReader = csv.reader(inFile)  # get iterator
        titles = next(csvReader)  # read first line

        colNum = 0  # search for colName
        while colNum < len(titles) and titles[colNum] != dataName:
            colNum = colNum + 1

        if colNum == len(titles):  # was titles end reached?
            print("Error:", dataName, "not found.")
        else:  # dataName was found
            for line in csvReader:
                dataList.append(float(line[colNum]))
    return dataList
session05-07.py
cubes = []
for x in range(1, 11):
    cubes.append(x * x * x)
cubes
session05-08.py
cubes = [x * x * x for x in range(1, 11)]
cubes
session05-09.py
cubes = [x * x * x for x in range(1, 11)]
evenCubes = [x for x in cubes if x % 2 == 0]
listing05-03.py
import csv


def makeDataList(dataName):
    with open("earthquakes.csv", "r") as inFile:
        dataList = []

        csvReader = csv.reader(inFile)  # get iterator
        titles = next(csvReader)  # read first line

        colNum = 0  # search for colName
        while colNum < len(titles) and titles[colNum] != dataName:
            colNum = colNum + 1

        if colNum == len(titles):
            print("Error:", dataName, "not found.")
        else:
            dataList = [float(line[colNum]) for line in csvReader]
    return dataList
session05-10.py
import csv


def makeDataList(dataName):
    with open("earthquakes.csv", 'r') as inFile:
        dataList = []

        csvReader = csv.reader(inFile)  # get iterator
        titles = next(csvReader)  # read first line

        colNum = 0  # search for colName
        while colNum < len(titles) and titles[colNum] != dataName:
            colNum = colNum + 1

        if colNum == len(titles):  # was titles end reached?
            print("Error:", dataName, "not found.")
        else:  # dataName was found
            for line in csvReader:
                dataList.append(float(line[colNum]))
    return dataList


magList = makeDataList("mag")
len(magList)
for i in range(10):
    print(magList[i], end=" ")

depthList = makeDataList("depth")
len(depthList)
for i in range(10):
    print(depthList[i], end=" ")

# makeDataList("notATitle")
session05-11.py
import statistics
import csv


def makeDataList(dataName):
    with open("earthquakes.csv", 'r') as inFile:
        dataList = []

        csvReader = csv.reader(inFile)  # get iterator
        titles = next(csvReader)  # read first line

        colNum = 0  # search for colName
        while colNum < len(titles) and titles[colNum] != dataName:
            colNum = colNum + 1

        if colNum == len(titles):  # was titles end reached?
            print("Error:", dataName, "not found.")
        else:  # dataName was found
            for line in csvReader:
                dataList.append(float(line[colNum]))
    return dataList


magList = makeDataList("mag")

max(magList)
min(magList)
statistics.mean(magList)
statistics.median(magList)
statistics.multimode(magList)
statistics.stdev(magList)
session05-12.py
import csv


def frequencyTable(aList):
    countDict = {}
    for item in aList:
        if item in aList:
            if item in countDict:
                countDict[item] = countDict[item] + 1
            else:
                countDict[item] = 1
    itemList = list(countDict.keys())
    itemList.sort()

    print("ITEM", "FREQUENCY")
    for item in itemList:
        print("{0:4.1f} {1:6d}".format(item, countDict[item]))


def makeDataList(dataName):
    with open("earthquakes.csv", 'r') as inFile:
        dataList = []

        csvReader = csv.reader(inFile)  # get iterator
        titles = next(csvReader)  # read first line

        colNum = 0  # search for colName
        while colNum < len(titles) and titles[colNum] != dataName:
            colNum = colNum + 1

        if colNum == len(titles):  # was titles end reached?
            print("Error:", dataName, "not found.")
        else:  # dataName was found
            for line in csvReader:
                dataList.append(float(line[colNum]))
    return dataList


magList = makeDataList("mag")


frequencyTable(magList)
session05-13.py
import json
import urllib.request
handle = urllib.request.urlopen("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.geojson")
data = handle.read()  # read all JSON data
eData = json.loads(data)  # convert to Python
eData.keys()  # eData is a dictionary
earthquakeList = eData.get('features')
len(earthquakeList)  # list of earthquakes
listing05-04.py
def makeMagList(earthquakeData):
    magList = []
    earthquakes = earthquakeData.get('features')
    for i in range(len(earthquakes)):
        earthquake = earthquakes[i]
        properties = earthquake.get('properties')
        mag = properties.get('mag')
        magList.append(mag)

    return magList
session05-14.py
import json
import statistics
import urllib.request


def makeMagList(earthquakeData):
    magList = []
    earthquakes = earthquakeData.get('features')
    for i in range(len(earthquakes)):
        earthquake = earthquakes[i]
        properties = earthquake.get('properties')
        mag = properties.get('mag')
        magList.append(mag)

    return magList


handle = urllib.request.urlopen(
    "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.geojson")
data = handle.read()  # read all JSON data
eData = json.loads(data)  # convert to Python
magList = makeMagList(eData)

len(magList)
min(magList)
max(magList)
statistics.mean(magList)
statistics.median(magList)
statistics.multimode(magList)
print(statistics.stdev(magList)
listing05-05.py
def correlation(xList, yList):
    import statistics
    xBar = statistics.mean(xList)
    yBar = statistics.mean(yList)
    xStd = statistics.stdev(xList)
    yStd = statistics.stdev(yList)
    num = 0.0
    for i in range(len(xList)):
        num = num + (xList[i] - xBar) * (yList[i] - yBar)
    corr = num / ((len(xList) - 1) * xStd * yStd)
    return corr
session05-15.py
import csv


def makeDataList(dataName):
    with open("earthquakes.csv", "r") as inFile:
        dataList = []

        csvReader = csv.reader(inFile)  # get iterator
        titles = next(csvReader)  # read first line

        colNum = 0  # search for colName
        while colNum < len(titles) and titles[colNum] != dataName:
            colNum = colNum + 1

        if colNum == len(titles):
            print("Error:", dataName, "not found.")
        else:
            dataList = [float(line[colNum]) for line in csvReader]
    return dataList


def correlation(xList, yList):
    import statistics
    xBar = statistics.mean(xList)
    yBar = statistics.mean(yList)
    xStd = statistics.stdev(xList)
    yStd = statistics.stdev(yList)
    num = 0.0
    for i in range(len(xList)):
        num = num + (xList[i] - xBar) * (yList[i] - yBar)
    corr = num / ((len(xList) - 1) * xStd * yStd)
    return corr


magList = makeDataList("mag")
depthList = makeDataList("depth")
correlation(magList, depthList)