From 71603ec299ec95d68fb9c6a4f130085f63b1a7c3 Mon Sep 17 00:00:00 2001 From: Daniel Svitan Date: Sat, 21 Dec 2024 19:34:11 +0100 Subject: [PATCH] :zap: Adds graphing occupation --- analyze.py | 8 +++++--- analyze_occupation.py | 20 +++++++++++++------- analyze_ses.py | 6 +++--- analyze_sex.py | 6 +++--- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/analyze.py b/analyze.py index cc443cb..a1aaf5a 100644 --- a/analyze.py +++ b/analyze.py @@ -27,7 +27,7 @@ def analyze(name: str, data: List[np.ndarray]): return F, p -def plot_box(data, labels, Fs, ps, title): +def plot_violin(data, labels, Fs, ps, title): if not graph: return @@ -52,8 +52,10 @@ def plot_box(data, labels, Fs, ps, title): axs[j, k].text(0.01, 0.99, f"F-stat: {F}\np-val: {p}", ha="left", va="top", transform=axs[j, k].transAxes, fontweight="bold") - means = np.array([a.mean() for a in data[j * 2 + k]]) - print(means) # TODO: add to graph + means = list([a.mean() for a in data[j * 2 + k]]) + for l in range(len(means)): + mean = round(means[l], 2) + axs[j, k].text(l + 1.05, mean + 0.05, f"{mean}") fig.tight_layout() fig.show() diff --git a/analyze_occupation.py b/analyze_occupation.py index 1de8ef1..4c3532d 100644 --- a/analyze_occupation.py +++ b/analyze_occupation.py @@ -1,6 +1,6 @@ import numpy as np -from analyze import analyze +from analyze import analyze, plot_violin dataset = np.load("clean.npy") print(f"dataset shape: {dataset.shape}; analyzing column 7 (occupation)") @@ -15,17 +15,23 @@ print("") def analyze_occupation(name: str, col: np.ndarray): occupation_col = dataset[:, 7] - analyze(name, [ + data = [ col[occupation_col == 0], col[occupation_col == 1], col[occupation_col == 2], col[occupation_col == 3], col[occupation_col == 4], col[occupation_col == 5] - ]) + ] + F, p = analyze(name, data) + return data, F, p -analyze_occupation("gpa", dataset[:, 2]) -analyze_occupation("math", dataset[:, 3]) -analyze_occupation("slovak", dataset[:, 4]) -analyze_occupation("english", dataset[:, 5]) +data_gpa, F_gpa, p_gpa = analyze_occupation("gpa", dataset[:, 2]) +data_math, F_math, p_math = analyze_occupation("math", dataset[:, 3]) +data_slovak, F_slovak, p_slovak = analyze_occupation("slovak", dataset[:, 4]) +data_english, F_english, p_english = analyze_occupation("english", dataset[:, 5]) + +plot_violin([data_gpa, data_math, data_slovak, data_english], + ["Veľa práce", "Málo práce", "Šport", "Hudba", "Iné", "Žiadne"], + [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Práca alebo aktivita") diff --git a/analyze_ses.py b/analyze_ses.py index a287fd7..ab72ff2 100644 --- a/analyze_ses.py +++ b/analyze_ses.py @@ -1,6 +1,6 @@ import numpy as np -from analyze import analyze, plot_box +from analyze import analyze, plot_violin dataset = np.load("clean.npy") print(f"dataset shape: {dataset.shape}; analyzing column 6 (ses)") @@ -26,5 +26,5 @@ data_math, F_math, p_math = analyze_ses("math", dataset[:, 3]) data_slovak, F_slovak, p_slovak = analyze_ses("slovak", dataset[:, 4]) data_english, F_english, p_english = analyze_ses("english", dataset[:, 5]) -plot_box([data_gpa, data_math, data_slovak, data_english], ["Nižšia trieda", "Stredná trieda", "Vyššia trieda"], - [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Socio-ekonomická trieda") +plot_violin([data_gpa, data_math, data_slovak, data_english], ["Nižšia trieda", "Stredná trieda", "Vyššia trieda"], + [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Socio-ekonomická trieda") diff --git a/analyze_sex.py b/analyze_sex.py index d1f1598..999e45b 100644 --- a/analyze_sex.py +++ b/analyze_sex.py @@ -1,6 +1,6 @@ import numpy as np -from analyze import analyze, plot_box +from analyze import analyze, plot_violin dataset = np.load("clean.npy") print(f"dataset shape: {dataset.shape}; analyzing column 1 (sex)") @@ -24,5 +24,5 @@ data_math, F_math, p_math = analyze_sex("math", dataset[:, 3]) data_slovak, F_slovak, p_slovak = analyze_sex("slovak", dataset[:, 4]) data_english, F_english, p_english = analyze_sex("english", dataset[:, 5]) -plot_box([data_gpa, data_math, data_slovak, data_english], ["Ženy", "Muži"], - [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Pohlavie") +plot_violin([data_gpa, data_math, data_slovak, data_english], ["Ženy", "Muži"], + [F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english], "Pohlavie")