64 lines
1.3 KiB
Python
Executable File
64 lines
1.3 KiB
Python
Executable File
import pandas as pd
|
|
import numpy as np
|
|
import scipy.stats as s
|
|
|
|
# 1a.
|
|
fileobj = open("./mt2_datafile.txt", "r")
|
|
|
|
# 1b.
|
|
datai = fileobj.readlines()
|
|
datam = datai[0]
|
|
dataf = datam.split(',')
|
|
print(dataf)
|
|
|
|
# 1c.
|
|
dataa = np.array(dataf)
|
|
dataa = np.reshape(dataf,(3,7))
|
|
print(dataa)
|
|
|
|
# 1d.
|
|
for n in range(len(dataf)):
|
|
dataf[n] = int(dataf[n])
|
|
print(dataf)
|
|
|
|
print(np.mean(dataf))
|
|
print(np.median(dataf))
|
|
print(np.std(dataf))
|
|
print(s.iqr(dataf))
|
|
|
|
# mean and median are close, no outliers
|
|
# stddev is quite small, suggesting consistent data
|
|
# iqr is larger than stddev, so the middle is more spread out than the rest (??? perhaps ???)
|
|
|
|
# 2a.
|
|
students = {"name": ["garrus vakarian", "matilda bradbury", "cordelia vorkosigan", "kira nerys", "Jean-Luc Picard"],\
|
|
"credits": [37,43,36,23,50], "GPA": [3.2,4.0,3.7,3.6,3.4], "Hometown": ["Palaven", "Whitestone","Vashnoi",\
|
|
"Dahkur", "La Barre"]}
|
|
|
|
# 2b.
|
|
# fyi: sdt stands for "superior data type"
|
|
sdt = pd.DataFrame(students)
|
|
print(sdt)
|
|
# beautiful. incredible. just amazing.
|
|
|
|
# 2c.
|
|
sdt.set_index("name",inplace=True)
|
|
|
|
# 2d.
|
|
print("kira's GPA:" + str(sdt.loc["kira nerys","GPA"]))
|
|
|
|
# 2e.
|
|
sdt.loc["matilda bradbury","credits"] = 44
|
|
print(sdt)
|
|
|
|
# 2f.
|
|
print(sdt.credits.describe())
|
|
print(sdt.GPA.describe())
|
|
|
|
# 2g.
|
|
|
|
height = [10,23,-1,43,100]
|
|
|
|
sdt["height (in meters)"] = height
|
|
print(sdt)
|