import pandas as pd import numpy as np import scipy.stats as s # 1a. fileobj = open("./mt2_datafile.txt", "r") # 1b. datai = fileobj.readlines() datam = datai[0] dataf = datam.split(',') print(dataf) # 1c. dataa = np.array(dataf) dataa = np.reshape(dataf,(3,7)) print(dataa) # 1d. for n in range(len(dataf)): dataf[n] = int(dataf[n]) print(dataf) print(np.mean(dataf)) print(np.median(dataf)) print(np.std(dataf)) print(s.iqr(dataf)) # mean and median are close, no outliers # stddev is quite small, suggesting consistent data # iqr is larger than stddev, so the middle is more spread out than the rest (??? perhaps ???) # 2a. students = {"name": ["garrus vakarian", "matilda bradbury", "cordelia vorkosigan", "kira nerys", "Jean-Luc Picard"],\ "credits": [37,43,36,23,50], "GPA": [3.2,4.0,3.7,3.6,3.4], "Hometown": ["Palaven", "Whitestone","Vashnoi",\ "Dahkur", "La Barre"]} # 2b. # fyi: sdt stands for "superior data type" sdt = pd.DataFrame(students) print(sdt) # beautiful. incredible. just amazing. # 2c. sdt.set_index("name",inplace=True) # 2d. print("kira's GPA:" + str(sdt.loc["kira nerys","GPA"])) # 2e. sdt.loc["matilda bradbury","credits"] = 44 print(sdt) # 2f. print(sdt.credits.describe()) print(sdt.GPA.describe()) # 2g. height = [10,23,-1,43,100] sdt["height (in meters)"] = height print(sdt)