🔨 Refactors analyzing into one function

This commit is contained in:
Daniel Svitan 2024-12-15 12:00:04 +01:00
parent d03ef94d4f
commit bade4ec45c
3 changed files with 40 additions and 30 deletions

18
analyze.py Normal file
View File

@ -0,0 +1,18 @@
from typing import List
import numpy as np
import scipy.stats as stats
def analyze(name: str, data: List[np.ndarray]):
F, p = stats.f_oneway(*data)
print(f"F-stats for {name}: {F}")
print(f"p-value for {name}: {p}")
if p > 0.05:
print("statistically insignificant\n")
return
print("statistically significant")
tukey_results = stats.tukey_hsd(*data)
print(tukey_results)

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
import scipy.stats as stats
from analyze import analyze
dataset = np.load("clean.npy") dataset = np.load("clean.npy")
print(f"dataset shape: {dataset.shape}, analyzing column 6 (sex)") print(f"dataset shape: {dataset.shape}, analyzing column 6 (sex)")
@ -12,22 +13,19 @@ print("\t5 - none")
print("") print("")
def analyze(name: str, col: np.ndarray): def analyze_occupation(name: str, col: np.ndarray):
occupation_col = dataset[:, 6] occupation_col = dataset[:, 6]
F, p = stats.f_oneway(col[occupation_col == 0], col[occupation_col == 1], col[occupation_col == 2], col[occupation_col == 3], col[occupation_col == 4], col[occupation_col == 5]) analyze(name, [
print(f"F-stats for {name}: {F}") col[occupation_col == 0],
print(f"p-value for {name}: {p}") col[occupation_col == 1],
col[occupation_col == 2],
if p > 0.05: col[occupation_col == 3],
print("statistically insignificant\n") col[occupation_col == 4],
return col[occupation_col == 5]
])
print("statistically significant")
tukey_results = stats.tukey_hsd(col[occupation_col == 0], col[occupation_col == 1], col[occupation_col == 2], col[occupation_col == 3], col[occupation_col == 4], col[occupation_col == 5])
print(tukey_results)
analyze("gpa", dataset[:, 2]) analyze_occupation("gpa", dataset[:, 2])
analyze("math", dataset[:, 3]) analyze_occupation("math", dataset[:, 3])
analyze("slovak", dataset[:, 4]) analyze_occupation("slovak", dataset[:, 4])
analyze("english", dataset[:, 5]) analyze_occupation("english", dataset[:, 5])

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
import scipy.stats as stats
from analyze import analyze
dataset = np.load("clean.npy") dataset = np.load("clean.npy")
print(f"dataset shape: {dataset.shape}, analyzing column 1 (sex)") print(f"dataset shape: {dataset.shape}, analyzing column 1 (sex)")
@ -8,19 +9,12 @@ print("\t1 - male")
print("") print("")
def analyze(name: str, col: np.ndarray): def analyze_sex(name: str, col: np.ndarray):
sex_col = dataset[:, 1] sex_col = dataset[:, 1]
F, p = stats.f_oneway(col[sex_col == 0], col[sex_col == 1]) analyze(name, [
print(f"F-stats for {name}: {F}") col[sex_col == 0],
print(f"p-value for {name}: {p}") col[sex_col == 1]
])
if p > 0.05:
print("statistically insignificant\n")
return
print("statistically significant")
tukey_results = stats.tukey_hsd(col[sex_col == 0], col[sex_col == 1])
print(tukey_results)
analyze("gpa", dataset[:, 2]) analyze("gpa", dataset[:, 2])