From 96a6599cf92b4860bd6fd0f33e25a7cd2229d1fa Mon Sep 17 00:00:00 2001 From: Daniel Svitan Date: Fri, 27 Dec 2024 16:13:25 +0100 Subject: [PATCH] :lipstick: Fixes minor mistakes --- analyze.py | 26 ++++++++++-------- analyze_absence.py | 67 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/analyze.py b/analyze.py index 278cd21..5aaacb8 100644 --- a/analyze.py +++ b/analyze.py @@ -119,10 +119,14 @@ def plot_violin(data, labels, Fs, ps, title): axs[j, k].set_ylabel(grade_name_labels[index], fontweight="bold") # q1-q3 lines - for vec in data[index]: - inds = np.arange(1, len(data[index]) + 1) + for ind, vec in enumerate(data[index]): quartile1, median, quartile3 = np.percentile(vec, [25, 50, 75]) - axs[j, k].vlines(inds, quartile1, quartile3, color="gray", linewidths=3) + if quartile1 == quartile3: + if quartile1 >= 0.1: + quartile1 -= 0.1 + if quartile3 <= max(vec) - 0.1: + quartile3 += 0.1 + axs[j, k].vlines(ind + 1, quartile1, quartile3, color="gray", linewidths=3) axs[j, k].set_xticks(np.arange(1, len(labels) + 1), labels=labels) axs[j, k].set_yticks(np.arange(1, 5.01, step)) @@ -134,9 +138,9 @@ def plot_violin(data, labels, Fs, ps, title): part.set_facecolor(colors[i % len(colors)]) part.set_edgecolor(edge_colors[i % len(edge_colors)]) - F = round(Fs[index], 2) - p = round(ps[index], 4) - axs[j, k].text(0.01, 0.99, f"F-stat: {F:.2f}\np-val: {p:.4f}", ha="left", va="top", + F = Fs[index] + p = ps[index] + axs[j, k].text(0.01, 0.99, f"F-stat: {F:.4f}\np-val: {p:.4f}", ha="left", va="top", transform=axs[j, k].transAxes, fontweight="bold") axs[j, k].text(0.99, 0.99, @@ -147,12 +151,12 @@ def plot_violin(data, labels, Fs, ps, title): medians = list([np.median(a) for a in data[index]]) means = list([a.mean() for a in data[index]]) - for l in range(len(medians)): - median = round(medians[l], 2) - mean = round(means[l], 2) + for l in range(len(data[index])): + median = medians[l] + mean = means[l] # left - mean, right - median - axs[j, k].text(l + 1.13, median - 0.05, f"{median}", color="green") - axs[j, k].text(l + 0.90 - len(labels) * 0.065, mean - 0.05, f"{mean}", color="red") + axs[j, k].text(l + 1.13, median - 0.05, f"{median:.2f}", color="green") + axs[j, k].text(l + 0.90 - len(labels) * 0.065, mean - 0.05, f"{mean:.2f}", color="red") fig.tight_layout() if save != "": diff --git a/analyze_absence.py b/analyze_absence.py index ce74412..3475097 100644 --- a/analyze_absence.py +++ b/analyze_absence.py @@ -11,6 +11,9 @@ args = parser.parse_args() graph = args.graph save = args.save +colors = ["lightblue", "lightgreen", "lightcoral"] +edge_colors = ["blue", "green", "red"] + dataset = np.load("clean.npy") print(f"dataset shape: {dataset.shape}; analyzing column 11 (absence)") print("\tinteger value") @@ -55,29 +58,71 @@ for j in range(2): step = 1 if index > 0 else 0.5 if index == 0: - axs[j, k].scatter(dataset[:, 11], dataset[:, 2]) + x = dataset[:, 11] # absence + y = dataset[:, 2] # gpa + axs[j, k].scatter(x, y) axs[j, k].set_xlabel("Počet vymeškaných hodín") axs[j, k].set_ylabel(grade_name_labels[index]) + + # trendline + z = np.polyfit(x, y, 1) + p = np.poly1d(z) + + axs[j, k].plot(x, p(x), color="gray") else: current = list([data[index][0][data[index][1] == i + 1] for i in range(5)]) # i wanna kms - axs[j, k].violinplot(list(filter(lambda x: len(x), current)), showmeans=True) - axs[j, k].set_xticks(np.arange(1, 6, 1), labels=["1", "2", "3", "4", "5"]) + # data[index][0] = absence + # data[index][1] = grade + # data[index][0][where this specific grade] -> absences for that sepcific grade + # iterate 1 through 5 and plug into ^^ + + parts = axs[j, k].violinplot(list([x if len(x) else [0] for x in current]), showmeans=True, + showmedians=True) + axs[j, k].set_xticks(np.arange(1, 6), labels=["1", "2", "3", "4", "5"]) axs[j, k].set_xlabel(grade_name_labels[index]) axs[j, k].set_ylabel("Počet vymeškaných hodín") + # q1-q3 lines + for ind, vec in enumerate(current): + if len(vec) == 0: + continue + quartile1, median, quartile3 = np.percentile(vec, [25, 50, 75]) + print(quartile1, median, quartile3) + axs[j, k].vlines(ind + 1, quartile1, quartile3, color="gray", linewidths=3) + + parts["cmeans"].set_color("red") + parts["cmedians"].set_color("green") + + for i, part in enumerate(parts["bodies"]): + part.set_facecolor(colors[i % len(colors)]) + part.set_edgecolor(edge_colors[i % len(edge_colors)]) + axs[j, k].set_title(grade_names[index]) - tau = round(taus[index], 2) - p = round(ps[index], 4) - axs[j, k].text(0.01, 0.99, f"Tau τ: {tau:.2f}\np-val: {p:.4f}", ha="left", va="top", transform=axs[j, k].transAxes, + tau = taus[index] + p = ps[index] + axs[j, k].text(0.01, 0.99, f"Tau τ: {tau:.4f}\np-val: {p:.4f}", ha="left", va="top", + transform=axs[j, k].transAxes, fontweight="bold") if index: - by_grade = [data[index][0][data[index][1] == i + 1] for i in range(5)] - means = list([a.mean() for a in filter(lambda b: len(b), by_grade)]) - for l in range(len(means)): - mean = round(means[l], 2) - axs[j, k].text(l + 1.02, mean + 5, f"{mean}") + axs[j, k].text(0.99, 0.99, + f"Na ľavo - priemer (červená)\nNa pravo - medián (zelená)\nSivá - medzi kvartilom 1 a 3", + ha="right", + va="top", + transform=axs[j, k].transAxes) + + current = list([data[index][0][data[index][1] == i + 1] for i in range(5)]) # i wanna kms + medians = list([np.median(a) if len(a) else -1 for a in current]) + means = list([a.mean() if len(a) else -1 for a in current]) + for l in range(len(current)): + median = medians[l] + mean = means[l] + # left - mean, right - median + if median >= 0: + axs[j, k].text(l + 1.14, median - 5, f"{median:.2f}", color="green", ha="left") + if mean >= 0: + axs[j, k].text(l + 0.85, mean - 5, f"{mean:.2f}", color="red", ha="right") fig.tight_layout() if save != "":