import numpy as np import scipy.stats as s import glob as glob # glob import pandas as pd filelist = glob.glob("hw3_*.txt") filelist.sort() def read(file): fileobj = open(file, "r") outputstr = fileobj.readlines() fileobj.close() outputarray = np.zeros(len(outputstr)) for i in np.arange(len(outputstr)): outputarray[i] = float(outputstr[i]) return outputarray parameters = ["mean", "median", "std", "iqr", "skew", "kurtosis"] for i in range(len(filelist)): print(filelist[i]) data = np.array(read(filelist[i])) for n in range (len(parameters)): operation = parameters[n] print(str(operation) + " " + np.operation(data)) #for n in range(len(filelist)): # print(filelist[n]) # mean = np.mean(read(filelist[n])) # print("mean: " + str(mean)) # median = np.median(read(filelist[n])) # print("median: " + str(median)) # stddev = np.std(read(filelist[n])) # print("stddev: " + str(stddev)) # iqr = s.iqr(read(filelist[n])) # print("iqr: " + str(iqr)) # skew = s.skew(read(filelist[n])) # print("skew: " + str(skew)) # kurtosis = s.kurtosis(read(filelist[n])) # print("kurtosis: " + str(kurtosis)+"\n") # the mean and median are similar for all files, indicating solid, outlier free data. # standard deviation is quite high for everything except wind shear, indicating either \ # inconsistent readings for everything but wind shear, or more likely, smaller units and \ # higher rates of change. # the difference between shr1's iqr and stddev is larger than that of shr2's (shr2's is \ # quite close to its stddev), possibility of one minor outlier # none of the data is very skewed, the largest (absolute value) being 0.54896, and \ # all of the data has negative kurtosis, meaning when distibuted, it will have a shallower \ # peak than the bell curve (e^x^2) # the february and may datasets are similar in that their wind shears are similar, though \ # mays is still larger. they are different in that mays SRH and CAPE are both much higher, \ # so mays tornadoes are much stronger.