Chapter 5 ビックデータ: ファイルの入出力¶
# option 1 open then close file
fileRef = open("rainfall.txt", "r") # process the data in the file
fileRef.close()
# option 2, use with/as block
# file is automatically closed when block finishes
with open("rainfall.txt", "r") as fileRef: # process file
pass
with open("rainfall.txt", "r") as rainFile:
for aLine in rainFile:
values = aLine.split()
print(values[0], "had", values[1], "inches of rain.")
with open("rainfall.txt", "r") as rainFile:
with open("rainfallInCM.txt", "w") as outFile:
for aLine in rainFile:
values = aLine.split()
inches = float(values[1])
cm = 2.54 * inches
nChars = outFile.write(values[0] + " " + str(cm) + "\n")
fruitPrice = 75
fruit = 'apple'
print("The {0} costs {1:d} cents".format(fruit, fruitPrice))
item = 'a dozen eggs'
itemPrice = 2.4
print("The price of {0} is ${1:.2f}".format(item, itemPrice))
myDict = {'name': 'candy bar', 'price': 95}
print("The {name} costs {price} cents.".format(**myDict))
print("This text is {0:^25s}".format("centered"))
print("This text is {0:>25s}".format("right justified"))
quizGrade = 7.5
totalPoints = 12
print("{0:.1f} is {1:.2%} of {2:d} total points".format(quizGrade, quizGrade / totalPoints, totalPoints))
with open("rainfall.txt", "r") as inFile:
aLine = inFile.readline() # read one line
aLine
with open("rainfall.txt", "r") as inFile:
lineList = inFile.readlines() # read all lines as a list
lineList
with open("rainfall.txt", "r") as inFile:
fileString = inFile.read() # read entire file as astring
fileString
import csv
with open("earthquakes3.csv", "r") as inFile:
csvReader = csv.reader(inFile) # pass file to csv reader
for line in csvReader: # read each interpreted line
print(line)
import csv
with open("earthquake.csv", "r") as inFile:
csvReader = csv.reader(inFile) # feed file to csv reader
titles = next(csvReader) # read first line with titles
colNum = 0 # prime the condition
while titles[colNum] != "mag":
colNum = colNum + 1 # update the condition
print("The magnitude is found in column", colNum)
import csv
def makeDataList(dataName):
with open("earthquake.csv", 'r') as inFile:
dataList = []
csvReader = csv.reader(inFile) # get iterator
titles = next(csvReader) # read first line
colNum = 0 # search for colName
while colNum < len(titles) and titles[colNum] != dataName:
colNum = colNum + 1
if colNum == len(titles): # was titles end reached?
print("Error:", dataName, "not found.")
else: # dataName was found
for line in csvReader:
dataList.append(float(line[colNum]))
return dataList
cubes = []
for x in range(1, 11):
cubes.append(x * x * x)
cubes
cubes = [x * x * x for x in range(1, 11)]
cubes
cubes = [x * x * x for x in range(1, 11)]
evenCubes = [x for x in cubes if x % 2 == 0]
import csv
def makeDataList(dataName):
with open("earthquakes.csv", "r") as inFile:
dataList = []
csvReader = csv.reader(inFile) # get iterator
titles = next(csvReader) # read first line
colNum = 0 # search for colName
while colNum < len(titles) and titles[colNum] != dataName:
colNum = colNum + 1
if colNum == len(titles):
print("Error:", dataName, "not found.")
else:
dataList = [float(line[colNum]) for line in csvReader]
return dataList
import csv
def makeDataList(dataName):
with open("earthquakes.csv", 'r') as inFile:
dataList = []
csvReader = csv.reader(inFile) # get iterator
titles = next(csvReader) # read first line
colNum = 0 # search for colName
while colNum < len(titles) and titles[colNum] != dataName:
colNum = colNum + 1
if colNum == len(titles): # was titles end reached?
print("Error:", dataName, "not found.")
else: # dataName was found
for line in csvReader:
dataList.append(float(line[colNum]))
return dataList
magList = makeDataList("mag")
len(magList)
for i in range(10):
print(magList[i], end=" ")
depthList = makeDataList("depth")
len(depthList)
for i in range(10):
print(depthList[i], end=" ")
# makeDataList("notATitle")
import statistics
import csv
def makeDataList(dataName):
with open("earthquakes.csv", 'r') as inFile:
dataList = []
csvReader = csv.reader(inFile) # get iterator
titles = next(csvReader) # read first line
colNum = 0 # search for colName
while colNum < len(titles) and titles[colNum] != dataName:
colNum = colNum + 1
if colNum == len(titles): # was titles end reached?
print("Error:", dataName, "not found.")
else: # dataName was found
for line in csvReader:
dataList.append(float(line[colNum]))
return dataList
magList = makeDataList("mag")
max(magList)
min(magList)
statistics.mean(magList)
statistics.median(magList)
statistics.multimode(magList)
statistics.stdev(magList)
import csv
def frequencyTable(aList):
countDict = {}
for item in aList:
if item in aList:
if item in countDict:
countDict[item] = countDict[item] + 1
else:
countDict[item] = 1
itemList = list(countDict.keys())
itemList.sort()
print("ITEM", "FREQUENCY")
for item in itemList:
print("{0:4.1f} {1:6d}".format(item, countDict[item]))
def makeDataList(dataName):
with open("earthquakes.csv", 'r') as inFile:
dataList = []
csvReader = csv.reader(inFile) # get iterator
titles = next(csvReader) # read first line
colNum = 0 # search for colName
while colNum < len(titles) and titles[colNum] != dataName:
colNum = colNum + 1
if colNum == len(titles): # was titles end reached?
print("Error:", dataName, "not found.")
else: # dataName was found
for line in csvReader:
dataList.append(float(line[colNum]))
return dataList
magList = makeDataList("mag")
frequencyTable(magList)
import json
import urllib.request
handle = urllib.request.urlopen("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.geojson")
data = handle.read() # read all JSON data
eData = json.loads(data) # convert to Python
eData.keys() # eData is a dictionary
earthquakeList = eData.get('features')
len(earthquakeList) # list of earthquakes
def makeMagList(earthquakeData):
magList = []
earthquakes = earthquakeData.get('features')
for i in range(len(earthquakes)):
earthquake = earthquakes[i]
properties = earthquake.get('properties')
mag = properties.get('mag')
magList.append(mag)
return magList
import json
import statistics
import urllib.request
def makeMagList(earthquakeData):
magList = []
earthquakes = earthquakeData.get('features')
for i in range(len(earthquakes)):
earthquake = earthquakes[i]
properties = earthquake.get('properties')
mag = properties.get('mag')
magList.append(mag)
return magList
handle = urllib.request.urlopen(
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.geojson")
data = handle.read() # read all JSON data
eData = json.loads(data) # convert to Python
magList = makeMagList(eData)
len(magList)
min(magList)
max(magList)
statistics.mean(magList)
statistics.median(magList)
statistics.multimode(magList)
print(statistics.stdev(magList)
def correlation(xList, yList):
import statistics
xBar = statistics.mean(xList)
yBar = statistics.mean(yList)
xStd = statistics.stdev(xList)
yStd = statistics.stdev(yList)
num = 0.0
for i in range(len(xList)):
num = num + (xList[i] - xBar) * (yList[i] - yBar)
corr = num / ((len(xList) - 1) * xStd * yStd)
return corr
import csv
def makeDataList(dataName):
with open("earthquakes.csv", "r") as inFile:
dataList = []
csvReader = csv.reader(inFile) # get iterator
titles = next(csvReader) # read first line
colNum = 0 # search for colName
while colNum < len(titles) and titles[colNum] != dataName:
colNum = colNum + 1
if colNum == len(titles):
print("Error:", dataName, "not found.")
else:
dataList = [float(line[colNum]) for line in csvReader]
return dataList
def correlation(xList, yList):
import statistics
xBar = statistics.mean(xList)
yBar = statistics.mean(yList)
xStd = statistics.stdev(xList)
yStd = statistics.stdev(yList)
num = 0.0
for i in range(len(xList)):
num = num + (xList[i] - xBar) * (yList[i] - yBar)
corr = num / ((len(xList) - 1) * xStd * yStd)
return corr
magList = makeDataList("mag")
depthList = makeDataList("depth")
correlation(magList, depthList)