🔨 Refactors analyzing into one function
This commit is contained in:
parent
d03ef94d4f
commit
bade4ec45c
18
analyze.py
Normal file
18
analyze.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import scipy.stats as stats
|
||||||
|
|
||||||
|
|
||||||
|
def analyze(name: str, data: List[np.ndarray]):
|
||||||
|
F, p = stats.f_oneway(*data)
|
||||||
|
print(f"F-stats for {name}: {F}")
|
||||||
|
print(f"p-value for {name}: {p}")
|
||||||
|
|
||||||
|
if p > 0.05:
|
||||||
|
print("statistically insignificant\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("statistically significant")
|
||||||
|
tukey_results = stats.tukey_hsd(*data)
|
||||||
|
print(tukey_results)
|
@ -1,5 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.stats as stats
|
|
||||||
|
from analyze import analyze
|
||||||
|
|
||||||
dataset = np.load("clean.npy")
|
dataset = np.load("clean.npy")
|
||||||
print(f"dataset shape: {dataset.shape}, analyzing column 6 (sex)")
|
print(f"dataset shape: {dataset.shape}, analyzing column 6 (sex)")
|
||||||
@ -12,22 +13,19 @@ print("\t5 - none")
|
|||||||
print("")
|
print("")
|
||||||
|
|
||||||
|
|
||||||
def analyze(name: str, col: np.ndarray):
|
def analyze_occupation(name: str, col: np.ndarray):
|
||||||
occupation_col = dataset[:, 6]
|
occupation_col = dataset[:, 6]
|
||||||
F, p = stats.f_oneway(col[occupation_col == 0], col[occupation_col == 1], col[occupation_col == 2], col[occupation_col == 3], col[occupation_col == 4], col[occupation_col == 5])
|
analyze(name, [
|
||||||
print(f"F-stats for {name}: {F}")
|
col[occupation_col == 0],
|
||||||
print(f"p-value for {name}: {p}")
|
col[occupation_col == 1],
|
||||||
|
col[occupation_col == 2],
|
||||||
if p > 0.05:
|
col[occupation_col == 3],
|
||||||
print("statistically insignificant\n")
|
col[occupation_col == 4],
|
||||||
return
|
col[occupation_col == 5]
|
||||||
|
])
|
||||||
print("statistically significant")
|
|
||||||
tukey_results = stats.tukey_hsd(col[occupation_col == 0], col[occupation_col == 1], col[occupation_col == 2], col[occupation_col == 3], col[occupation_col == 4], col[occupation_col == 5])
|
|
||||||
print(tukey_results)
|
|
||||||
|
|
||||||
|
|
||||||
analyze("gpa", dataset[:, 2])
|
analyze_occupation("gpa", dataset[:, 2])
|
||||||
analyze("math", dataset[:, 3])
|
analyze_occupation("math", dataset[:, 3])
|
||||||
analyze("slovak", dataset[:, 4])
|
analyze_occupation("slovak", dataset[:, 4])
|
||||||
analyze("english", dataset[:, 5])
|
analyze_occupation("english", dataset[:, 5])
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.stats as stats
|
|
||||||
|
from analyze import analyze
|
||||||
|
|
||||||
dataset = np.load("clean.npy")
|
dataset = np.load("clean.npy")
|
||||||
print(f"dataset shape: {dataset.shape}, analyzing column 1 (sex)")
|
print(f"dataset shape: {dataset.shape}, analyzing column 1 (sex)")
|
||||||
@ -8,19 +9,12 @@ print("\t1 - male")
|
|||||||
print("")
|
print("")
|
||||||
|
|
||||||
|
|
||||||
def analyze(name: str, col: np.ndarray):
|
def analyze_sex(name: str, col: np.ndarray):
|
||||||
sex_col = dataset[:, 1]
|
sex_col = dataset[:, 1]
|
||||||
F, p = stats.f_oneway(col[sex_col == 0], col[sex_col == 1])
|
analyze(name, [
|
||||||
print(f"F-stats for {name}: {F}")
|
col[sex_col == 0],
|
||||||
print(f"p-value for {name}: {p}")
|
col[sex_col == 1]
|
||||||
|
])
|
||||||
if p > 0.05:
|
|
||||||
print("statistically insignificant\n")
|
|
||||||
return
|
|
||||||
|
|
||||||
print("statistically significant")
|
|
||||||
tukey_results = stats.tukey_hsd(col[sex_col == 0], col[sex_col == 1])
|
|
||||||
print(tukey_results)
|
|
||||||
|
|
||||||
|
|
||||||
analyze("gpa", dataset[:, 2])
|
analyze("gpa", dataset[:, 2])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user