From 0420fda2f140e3201ffc90b34f381c35bcb6116e Mon Sep 17 00:00:00 2001 From: Daniel Svitan Date: Sat, 21 Dec 2024 18:53:17 +0100 Subject: [PATCH] :lipstick: Adds ticks to int grades --- analyze.py | 27 ++++++++++++++++----------- analyze_ses.py | 19 ++++++++++++------- analyze_sex.py | 5 ++--- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/analyze.py b/analyze.py index a285936..3cacd7b 100644 --- a/analyze.py +++ b/analyze.py @@ -27,27 +27,32 @@ def analyze(name: str, data: List[np.ndarray]): return F, p -def plot_box(data, labels, Fs, ps, title, titles): +def plot_box(data, labels, Fs, ps, title): if not graph: return + titles = ["Priemer", "Matematika", "Slovenčina", "Angličtina"] + fig, axs = plt.subplots(2, 2, sharex=True) fig.suptitle(title) fig.set_size_inches(12, 9) - for i in range(2): - for j in range(2): - print(f"{i}x{j} giving {i * 2 + j}") - axs[i, j].boxplot(data[i * 2 + j], labels=labels) - axs[i, j].set_title(titles[i * 2 + j]) + for j in range(2): + for k in range(2): + index = j * 2 + k + axs[j, k].boxplot(data[index], labels=labels) + axs[j, k].set_title(titles[index]) - F = round(Fs[i * 2 + j], 2) - p = round(ps[i * 2 + j], 4) - axs[i, j].text(0.01, 0.99, f"F-stat: {F}\np-val: {p}", ha="left", va="top", transform=axs[i, j].transAxes, + if index > 0: + axs[j, k].set_yticks(np.arange(1, 6, 1)) + + F = round(Fs[index], 2) + p = round(ps[index], 4) + axs[j, k].text(0.01, 0.99, f"F-stat: {F}\np-val: {p}", ha="left", va="top", transform=axs[j, k].transAxes, fontweight="bold") - avgs = np.array([a.mean() for a in data[i * 2 + j]]) - print(avgs) + medians = np.array([np.median(a) for a in data[j * 2 + k]]) + print(medians) # TODO: add to graph fig.tight_layout() fig.show() diff --git a/analyze_ses.py b/analyze_ses.py index c14e530..a287fd7 100644 --- a/analyze_ses.py +++ b/analyze_ses.py @@ -1,6 +1,6 @@ import numpy as np -from analyze import analyze +from analyze import analyze, plot_box dataset = np.load("clean.npy") print(f"dataset shape: {dataset.shape}; analyzing column 6 (ses)") @@ -12,14 +12,19 @@ print("") def analyze_ses(name: str, col: np.ndarray): sex_col = dataset[:, 6] - analyze(name, [ + data = [ col[sex_col == 0], col[sex_col == 1], col[sex_col == 2] - ]) + ] + F, p = analyze(name, data) + return data, F, p -analyze_ses("gpa", dataset[:, 2]) -analyze_ses("math", dataset[:, 3]) -analyze_ses("slovak", dataset[:, 4]) -analyze_ses("english", dataset[:, 5]) +data_gpa, F_gpa, p_gpa = analyze_ses("gpa", dataset[:, 2]) +data_math, F_math, p_math = analyze_ses("math", dataset[:, 3]) +data_slovak, F_slovak, p_slovak = analyze_ses("slovak", dataset[:, 4]) +data_english, F_english, p_english = analyze_ses("english", dataset[:, 5]) + +plot_box([data_gpa, data_math, data_slovak, data_english], ["Nižšia trieda", "Stredná trieda", "Vyššia trieda"], + [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Socio-ekonomická trieda") diff --git a/analyze_sex.py b/analyze_sex.py index e2be9a3..d1f1598 100644 --- a/analyze_sex.py +++ b/analyze_sex.py @@ -24,6 +24,5 @@ data_math, F_math, p_math = analyze_sex("math", dataset[:, 3]) data_slovak, F_slovak, p_slovak = analyze_sex("slovak", dataset[:, 4]) data_english, F_english, p_english = analyze_sex("english", dataset[:, 5]) -plot_box([data_gpa, data_math, data_slovak, data_english], ["Female", "Male"], - [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], - "Pohlavie", ["Priemer", "Matematika", "Slovenčina", "Angličtina"]) +plot_box([data_gpa, data_math, data_slovak, data_english], ["Ženy", "Muži"], + [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Pohlavie")