Files
misc/python/atms-310/midterms/midterm2.py
2025-07-02 00:07:49 -07:00

64 lines
1.3 KiB
Python
Executable File

import pandas as pd
import numpy as np
import scipy.stats as s
# 1a.
fileobj = open("./mt2_datafile.txt", "r")
# 1b.
datai = fileobj.readlines()
datam = datai[0]
dataf = datam.split(',')
print(dataf)
# 1c.
dataa = np.array(dataf)
dataa = np.reshape(dataf,(3,7))
print(dataa)
# 1d.
for n in range(len(dataf)):
dataf[n] = int(dataf[n])
print(dataf)
print(np.mean(dataf))
print(np.median(dataf))
print(np.std(dataf))
print(s.iqr(dataf))
# mean and median are close, no outliers
# stddev is quite small, suggesting consistent data
# iqr is larger than stddev, so the middle is more spread out than the rest (??? perhaps ???)
# 2a.
students = {"name": ["garrus vakarian", "matilda bradbury", "cordelia vorkosigan", "kira nerys", "Jean-Luc Picard"],\
"credits": [37,43,36,23,50], "GPA": [3.2,4.0,3.7,3.6,3.4], "Hometown": ["Palaven", "Whitestone","Vashnoi",\
"Dahkur", "La Barre"]}
# 2b.
# fyi: sdt stands for "superior data type"
sdt = pd.DataFrame(students)
print(sdt)
# beautiful. incredible. just amazing.
# 2c.
sdt.set_index("name",inplace=True)
# 2d.
print("kira's GPA:" + str(sdt.loc["kira nerys","GPA"]))
# 2e.
sdt.loc["matilda bradbury","credits"] = 44
print(sdt)
# 2f.
print(sdt.credits.describe())
print(sdt.GPA.describe())
# 2g.
height = [10,23,-1,43,100]
sdt["height (in meters)"] = height
print(sdt)