🚧 Adds (non-working) boxplot

This commit is contained in:
Daniel Svitan 2024-12-21 18:42:14 +01:00
parent 2b0829c78e
commit 9b90fb2db3
3 changed files with 51 additions and 9 deletions

View File

@ -15,7 +15,7 @@ The cleaned dataset will have the following structure:
| 8 | Living | enum | [0-4] |
| 9 | Commute | enum | [0-4] |
| 10 | Sleep | enum | [0-2] |
| 11 | Absence | int | - |
| 11 | Absence | int | [0-∞] |
It will be saved in a `.npy` file (numpy format)

View File

@ -1,7 +1,14 @@
from typing import List
import argparse
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
parser = argparse.ArgumentParser()
parser.add_argument("-g", "--graph", action="store_true", default=False, help="Plot graph")
args = parser.parse_args()
graph = args.graph
def analyze(name: str, data: List[np.ndarray]):
@ -11,8 +18,37 @@ def analyze(name: str, data: List[np.ndarray]):
if p > 0.05:
print("statistically insignificant\n")
return
return F, p
print("statistically significant")
tukey_results = stats.tukey_hsd(*data)
print(tukey_results)
return F, p
def plot_box(data, labels, Fs, ps, title, titles):
if not graph:
return
fig, axs = plt.subplots(2, 2, sharex=True)
fig.suptitle(title)
fig.set_size_inches(12, 9)
for i in range(2):
for j in range(2):
print(f"{i}x{j} giving {i * 2 + j}")
axs[i, j].boxplot(data[i * 2 + j], labels=labels)
axs[i, j].set_title(titles[i * 2 + j])
F = round(Fs[i * 2 + j], 2)
p = round(ps[i * 2 + j], 4)
axs[i, j].text(0.01, 0.99, f"F-stat: {F}\np-val: {p}", ha="left", va="top", transform=axs[i, j].transAxes,
fontweight="bold")
avgs = np.array([a.mean() for a in data[i * 2 + j]])
print(avgs)
fig.tight_layout()
fig.show()
plt.show()

View File

@ -1,6 +1,6 @@
import numpy as np
from analyze import analyze
from analyze import analyze, plot_box
dataset = np.load("clean.npy")
print(f"dataset shape: {dataset.shape}; analyzing column 1 (sex)")
@ -11,13 +11,19 @@ print("")
def analyze_sex(name: str, col: np.ndarray):
sex_col = dataset[:, 1]
analyze(name, [
data = [
col[sex_col == 0],
col[sex_col == 1]
])
]
F, p = analyze(name, data)
return data, F, p
analyze_sex("gpa", dataset[:, 2])
analyze_sex("math", dataset[:, 3])
analyze_sex("slovak", dataset[:, 4])
analyze_sex("english", dataset[:, 5])
data_gpa, F_gpa, p_gpa = analyze_sex("gpa", dataset[:, 2])
data_math, F_math, p_math = analyze_sex("math", dataset[:, 3])
data_slovak, F_slovak, p_slovak = analyze_sex("slovak", dataset[:, 4])
data_english, F_english, p_english = analyze_sex("english", dataset[:, 5])
plot_box([data_gpa, data_math, data_slovak, data_english], ["Female", "Male"],
[F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english],
"Pohlavie", ["Priemer", "Matematika", "Slovenčina", "Angličtina"])