Compare commits
10 Commits
6fea60408f
...
2f3c547b55
Author | SHA1 | Date | |
---|---|---|---|
![]() |
2f3c547b55 | ||
![]() |
f6eafc28ec | ||
![]() |
96a6599cf9 | ||
![]() |
6ddd476834 | ||
![]() |
dc2e417969 | ||
![]() |
ab0d117c70 | ||
![]() |
f5fb3f647a | ||
![]() |
3ad7babcdc | ||
![]() |
6831e847ff | ||
![]() |
29ab473c3c |
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,6 +6,7 @@ venv/
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
||||||
results/
|
results/
|
||||||
|
paper/
|
||||||
|
|
||||||
*.zip
|
*.zip
|
||||||
*.csv
|
*.csv
|
||||||
@ -13,6 +14,8 @@ results/
|
|||||||
|
|
||||||
*.jasp
|
*.jasp
|
||||||
*.pth
|
*.pth
|
||||||
|
*.png
|
||||||
|
*.drawio
|
||||||
|
|
||||||
*.tar.gz
|
*.tar.gz
|
||||||
*.zip
|
*.zip
|
||||||
|
128
analyze.py
128
analyze.py
@ -1,8 +1,11 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
import itertools
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import scipy.stats as stats
|
import scipy.stats as stats
|
||||||
|
import scikit_posthocs as sp
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@ -12,34 +15,84 @@ args = parser.parse_args()
|
|||||||
graph = args.graph
|
graph = args.graph
|
||||||
save = args.save
|
save = args.save
|
||||||
|
|
||||||
|
colors = ["lightblue", "lightgreen", "lightcoral"]
|
||||||
|
edge_colors = ["blue", "green", "red"]
|
||||||
|
|
||||||
|
|
||||||
|
# source: mostly ChatGPT (ain't no way i'm writing this shit myself)
|
||||||
def analyze(name: str, data: List[np.ndarray]):
|
def analyze(name: str, data: List[np.ndarray]):
|
||||||
#print(f"Checking if normally distributed for {name}")
|
|
||||||
#for i in range(len(data)):
|
|
||||||
# _, normal_p = stats.shapiro(data[i])
|
|
||||||
# if normal_p > 0.05:
|
|
||||||
# print(f"\tGroup {i}: normally distributed")
|
|
||||||
# else:
|
|
||||||
# print(f"\tGroup {i}: NOT normally distributed")
|
|
||||||
|
|
||||||
filtered_data = []
|
filtered_data = []
|
||||||
|
group_names = []
|
||||||
|
all_values = []
|
||||||
for index, item in enumerate(data):
|
for index, item in enumerate(data):
|
||||||
if len(item) > 5:
|
numeric_data = [x for x in item if isinstance(x, (int, float))]
|
||||||
filtered_data.append(item)
|
if len(numeric_data) > 5:
|
||||||
|
filtered_data.append(numeric_data)
|
||||||
|
group_names.append(chr(65 + index))
|
||||||
|
all_values.extend(numeric_data)
|
||||||
else:
|
else:
|
||||||
print(f"Data group at index {index} removed due to insufficient size ({len(item)})")
|
print(f"Data group at index {index} removed due to insufficient size ({len(numeric_data)})")
|
||||||
|
|
||||||
|
if len(filtered_data) < 2:
|
||||||
|
print(f"Insufficient number of groups for Kruskal-Wallis test in {name}")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Kruskal-Wallis Test
|
||||||
F, p = stats.kruskal(*filtered_data)
|
F, p = stats.kruskal(*filtered_data)
|
||||||
print(f"F-stats for {name}: {F}")
|
print(f"\nF-stats for {name}: {F:.8f}")
|
||||||
print(f"p-value for {name}: {p}")
|
print(f"p-value for {name}: {p:.8f}")
|
||||||
|
|
||||||
if round(p, 4) > 0.05:
|
if p > 0.05:
|
||||||
print("statistically insignificant\n")
|
print("statistically insignificant\n")
|
||||||
return F, p
|
return F, p
|
||||||
|
|
||||||
print("statistically significant")
|
print("statistically significant")
|
||||||
tukey_results = stats.tukey_hsd(*filtered_data)
|
|
||||||
print(tukey_results)
|
# Post-Hoc Dunn Test (Bonferroni-adjusted p-values)
|
||||||
|
all_ranks = stats.rankdata(all_values) # Rank all values together
|
||||||
|
group_ranks = [all_ranks[start:start + len(group)] for start, group in
|
||||||
|
zip(np.cumsum([0] + [len(g) for g in filtered_data[:-1]]), filtered_data)]
|
||||||
|
posthoc_results = sp.posthoc_conover(filtered_data, p_adjust='bonferroni')
|
||||||
|
|
||||||
|
results = []
|
||||||
|
total_sample_size = len(all_values)
|
||||||
|
for group1, group2 in itertools.combinations(group_names, 2):
|
||||||
|
idx1 = group_names.index(group1)
|
||||||
|
idx2 = group_names.index(group2)
|
||||||
|
|
||||||
|
mean_rank_1 = np.mean(group_ranks[idx1])
|
||||||
|
mean_rank_2 = np.mean(group_ranks[idx2])
|
||||||
|
rank_diff = mean_rank_1 - mean_rank_2
|
||||||
|
|
||||||
|
n1 = len(filtered_data[idx1])
|
||||||
|
n2 = len(filtered_data[idx2])
|
||||||
|
|
||||||
|
# Effect size (Rank-Biserial Correlation)
|
||||||
|
z_stat = rank_diff / np.sqrt((n1 + n2) * (n1 * n2) / total_sample_size)
|
||||||
|
effect_size = z_stat / np.sqrt(total_sample_size)
|
||||||
|
|
||||||
|
# Mean difference
|
||||||
|
mean_diff = np.mean(filtered_data[idx1]) - np.mean(filtered_data[idx2])
|
||||||
|
|
||||||
|
# Median difference
|
||||||
|
median_diff = np.median(filtered_data[idx1]) - np.median(filtered_data[idx2])
|
||||||
|
|
||||||
|
# Post-Hoc Dunn p-value
|
||||||
|
p_value = posthoc_results.loc[idx1 + 1, idx2 + 1]
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"Skupina 1": group1,
|
||||||
|
"Skupina 2": group2,
|
||||||
|
"Veľkosť účinku": f"{effect_size:.4f}",
|
||||||
|
"Rozdiel priemerov": f"{mean_diff:.4f}",
|
||||||
|
"Rozdiel mediánov": f"{median_diff:.4f}",
|
||||||
|
"Post-Hoc p-hodnota": f"{p_value:.4f}"
|
||||||
|
})
|
||||||
|
|
||||||
|
results_df = pd.DataFrame(results, dtype="object")
|
||||||
|
print("\nSummary Table of Effect Size, Mean, and Median Differences:")
|
||||||
|
print(results_df.to_markdown(index=False, tablefmt="github", disable_numparse=True))
|
||||||
|
print("")
|
||||||
|
|
||||||
return F, p
|
return F, p
|
||||||
|
|
||||||
@ -60,25 +113,52 @@ def plot_violin(data, labels, Fs, ps, title):
|
|||||||
index = j * 2 + k
|
index = j * 2 + k
|
||||||
step = 1 if index > 0 else 0.5
|
step = 1 if index > 0 else 0.5
|
||||||
|
|
||||||
axs[j, k].violinplot(data[index], showmedians=True)
|
parts = axs[j, k].violinplot(data[index], showmedians=True, showmeans=True)
|
||||||
axs[j, k].set_title(grade_names[index])
|
axs[j, k].set_title(grade_names[index])
|
||||||
axs[j, k].set_xlabel(title, fontweight="bold")
|
axs[j, k].set_xlabel(title, fontweight="bold")
|
||||||
axs[j, k].set_ylabel(grade_name_labels[index], fontweight="bold")
|
axs[j, k].set_ylabel(grade_name_labels[index], fontweight="bold")
|
||||||
|
|
||||||
|
# q1-q3 lines
|
||||||
|
for ind, vec in enumerate(data[index]):
|
||||||
|
quartile1, median, quartile3 = np.percentile(vec, [25, 50, 75])
|
||||||
|
if quartile1 == quartile3:
|
||||||
|
if quartile1 >= 0.1:
|
||||||
|
quartile1 -= 0.1
|
||||||
|
if quartile3 <= max(vec) - 0.1:
|
||||||
|
quartile3 += 0.1
|
||||||
|
axs[j, k].vlines(ind + 1, quartile1, quartile3, color="gray", linewidths=3)
|
||||||
|
|
||||||
axs[j, k].set_xticks(np.arange(1, len(labels) + 1), labels=labels)
|
axs[j, k].set_xticks(np.arange(1, len(labels) + 1), labels=labels)
|
||||||
axs[j, k].set_yticks(np.arange(1, 5.01, step))
|
axs[j, k].set_yticks(np.arange(1, 5.01, step))
|
||||||
|
|
||||||
F = round(Fs[index], 2)
|
parts["cmeans"].set_color("red")
|
||||||
p = round(ps[index], 4)
|
parts["cmedians"].set_color("green")
|
||||||
axs[j, k].text(0.01, 0.99, f"F-stat: {F:.2f}\np-val: {p:.4f}", ha="left", va="top", transform=axs[j, k].transAxes,
|
|
||||||
|
for i, part in enumerate(parts["bodies"]):
|
||||||
|
part.set_facecolor(colors[i % len(colors)])
|
||||||
|
part.set_edgecolor(edge_colors[i % len(edge_colors)])
|
||||||
|
|
||||||
|
F = Fs[index]
|
||||||
|
p = ps[index]
|
||||||
|
axs[j, k].text(0.01, 0.99, f"F-stat: {F:.4f}\np-val: {p:.4f}", ha="left", va="top",
|
||||||
|
transform=axs[j, k].transAxes,
|
||||||
fontweight="bold")
|
fontweight="bold")
|
||||||
|
axs[j, k].text(0.99, 0.99,
|
||||||
|
f"Na ľavo - priemer (červená)\nNa pravo - medián (zelená)\nSivá - medzi kvartilom 1 a 3",
|
||||||
|
ha="right",
|
||||||
|
va="top",
|
||||||
|
transform=axs[j, k].transAxes)
|
||||||
|
|
||||||
medians = list([np.median(a) for a in data[index]])
|
medians = list([np.median(a) for a in data[index]])
|
||||||
for l in range(len(medians)):
|
means = list([a.mean() for a in data[index]])
|
||||||
median = round(medians[l], 2)
|
for l in range(len(data[index])):
|
||||||
axs[j, k].text(l + 1.05, median + 0.05, f"{median}")
|
median = medians[l]
|
||||||
|
mean = means[l]
|
||||||
|
# left - mean, right - median
|
||||||
|
axs[j, k].text(l + 1.13, median - 0.05, f"{median:.2f}", color="green")
|
||||||
|
axs[j, k].text(l + 0.90 - len(labels) * 0.065, mean - 0.05, f"{mean:.2f}", color="red")
|
||||||
|
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
fig.show()
|
|
||||||
if save != "":
|
if save != "":
|
||||||
plt.savefig(save)
|
plt.savefig(save)
|
||||||
else:
|
else:
|
||||||
|
19
analyze.sh
Executable file
19
analyze.sh
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/bash
|
||||||
|
|
||||||
|
find results ! -name 'train.txt' -type f -exec rm -f {} +
|
||||||
|
|
||||||
|
./venv/bin/python3 distribution.py --graph --save | tee results/distribution.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_sex.py --graph --save "results/Figure_13.png" | tee results/sex.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_ses.py --graph --save "results/Figure_14.png" | tee results/ses.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_occupation.py --graph --save "results/Figure_15.png" | tee results/occupation.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_living.py --graph --save "results/Figure_16.png" | tee results/living.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_commute.py --graph --save "results/Figure_17.png" | tee results/commute.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_sleep.py --graph --save "results/Figure_18.png" | tee results/sleep.txt
|
||||||
|
echo -e "\n\n\n\n"
|
||||||
|
./venv/bin/python3 analyze_absence.py --graph --save "results/Figure_19.png" | tee results/absence.txt
|
@ -6,8 +6,13 @@ import matplotlib.pyplot as plt
|
|||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-g", "--graph", action="store_true", default=False, help="Plot graph")
|
parser.add_argument("-g", "--graph", action="store_true", default=False, help="Plot graph")
|
||||||
|
parser.add_argument("-s", "--save", default="", help="Graph save location")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
graph = args.graph
|
graph = args.graph
|
||||||
|
save = args.save
|
||||||
|
|
||||||
|
colors = ["lightblue", "lightgreen", "lightcoral"]
|
||||||
|
edge_colors = ["blue", "green", "red"]
|
||||||
|
|
||||||
dataset = np.load("clean.npy")
|
dataset = np.load("clean.npy")
|
||||||
print(f"dataset shape: {dataset.shape}; analyzing column 11 (absence)")
|
print(f"dataset shape: {dataset.shape}; analyzing column 11 (absence)")
|
||||||
@ -52,31 +57,37 @@ for j in range(2):
|
|||||||
index = j * 2 + k
|
index = j * 2 + k
|
||||||
step = 1 if index > 0 else 0.5
|
step = 1 if index > 0 else 0.5
|
||||||
|
|
||||||
if index == 0:
|
if not index:
|
||||||
axs[j, k].scatter(dataset[:, 11], dataset[:, 2])
|
x = data[index][0] # absence
|
||||||
|
y = data[index][1] # grade
|
||||||
|
axs[j, k].scatter(x, y)
|
||||||
axs[j, k].set_xlabel("Počet vymeškaných hodín")
|
axs[j, k].set_xlabel("Počet vymeškaných hodín")
|
||||||
axs[j, k].set_ylabel(grade_name_labels[index])
|
axs[j, k].set_ylabel(grade_name_labels[index])
|
||||||
|
axs[j, k].set_yticks(np.arange(1, 6))
|
||||||
|
|
||||||
|
# trendline
|
||||||
|
z = np.polyfit(x, y, 1)
|
||||||
|
p = np.poly1d(z)
|
||||||
|
|
||||||
|
axs[j, k].plot(x, p(x), color="gray")
|
||||||
else:
|
else:
|
||||||
current = list([data[index][0][data[index][1] == i + 1] for i in range(5)]) # i wanna kms
|
by_grade = list([data[index][0][data[index][1] == i + 1] for i in range(5)])
|
||||||
axs[j, k].violinplot(list(filter(lambda x: len(x), current)), showmeans=True)
|
# data[index][0] - absences
|
||||||
axs[j, k].set_xticks(np.arange(1, 6, 1), labels=["1", "2", "3", "4", "5"])
|
# data[index][1] - grades
|
||||||
axs[j, k].set_xlabel(grade_name_labels[index])
|
# data[index][0][specific grade] - absences for that specific grande
|
||||||
axs[j, k].set_ylabel("Počet vymeškaných hodín")
|
# loop 1 through 5 plug in ^^
|
||||||
|
axs[j, k].boxplot(by_grade, tick_labels=["1", "2", "3", "4", "5"])
|
||||||
|
|
||||||
axs[j, k].set_title(grade_names[index])
|
axs[j, k].set_title(grade_names[index])
|
||||||
|
|
||||||
tau = round(taus[index], 2)
|
tau = taus[index]
|
||||||
p = round(ps[index], 4)
|
p = ps[index]
|
||||||
axs[j, k].text(0.01, 0.99, f"Tau τ: {tau:.2f}\np-val: {p:.4f}", ha="left", va="top", transform=axs[j, k].transAxes,
|
axs[j, k].text(0.01, 0.99, f"Tau τ: {tau:.4f}\np-val: {p:.4f}", ha="left", va="top",
|
||||||
|
transform=axs[j, k].transAxes,
|
||||||
fontweight="bold")
|
fontweight="bold")
|
||||||
|
|
||||||
if index:
|
|
||||||
by_grade = [data[index][0][data[index][1] == i + 1] for i in range(5)]
|
|
||||||
means = list([a.mean() for a in filter(lambda b: len(b), by_grade)])
|
|
||||||
for l in range(len(means)):
|
|
||||||
mean = round(means[l], 2)
|
|
||||||
axs[j, k].text(l + 1.02, mean + 5, f"{mean}")
|
|
||||||
|
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
fig.show()
|
if save != "":
|
||||||
plt.show()
|
plt.savefig(save)
|
||||||
|
else:
|
||||||
|
plt.show()
|
||||||
|
@ -7,8 +7,11 @@ parser = argparse.ArgumentParser(
|
|||||||
prog="distribution"
|
prog="distribution"
|
||||||
)
|
)
|
||||||
parser.add_argument("-g", "--graph", action="store_true", default=False, help="Display graphs")
|
parser.add_argument("-g", "--graph", action="store_true", default=False, help="Display graphs")
|
||||||
|
parser.add_argument("-s", "--save", action="store_true", default=False, help="Save graphs")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
graph = args.graph
|
graph = args.graph
|
||||||
|
save = args.save
|
||||||
|
graph_index = 1
|
||||||
|
|
||||||
dataset = np.load("clean.npy")
|
dataset = np.load("clean.npy")
|
||||||
print(f"dataset shape: {dataset.shape}; analyzing distribution\n")
|
print(f"dataset shape: {dataset.shape}; analyzing distribution\n")
|
||||||
@ -19,6 +22,10 @@ def percent(fraction: float) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def plot_pie(data, labels, title, explode=None):
|
def plot_pie(data, labels, title, explode=None):
|
||||||
|
global graph_index
|
||||||
|
if not graph:
|
||||||
|
return
|
||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(data):
|
while i < len(data):
|
||||||
if data[i] == 0:
|
if data[i] == 0:
|
||||||
@ -32,10 +39,18 @@ def plot_pie(data, labels, title, explode=None):
|
|||||||
plt.title(title)
|
plt.title(title)
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
if save:
|
||||||
|
plt.savefig(f"results/Figure_{graph_index}.png")
|
||||||
|
graph_index += 1
|
||||||
|
else:
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def plot_hist(data, title, xlabel, ylabel):
|
def plot_hist(data, title, xlabel, ylabel):
|
||||||
|
global graph_index
|
||||||
|
if not graph:
|
||||||
|
return
|
||||||
|
|
||||||
plt.figure(figsize=(8, 6))
|
plt.figure(figsize=(8, 6))
|
||||||
plt.hist(data, 25, edgecolor="black")
|
plt.hist(data, 25, edgecolor="black")
|
||||||
plt.title(title)
|
plt.title(title)
|
||||||
@ -43,7 +58,11 @@ def plot_hist(data, title, xlabel, ylabel):
|
|||||||
plt.ylabel(ylabel)
|
plt.ylabel(ylabel)
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
if save:
|
||||||
|
plt.savefig(f"results/Figure_{graph_index}.png")
|
||||||
|
graph_index += 1
|
||||||
|
else:
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
grade = dataset[:, 0]
|
grade = dataset[:, 0]
|
||||||
@ -62,12 +81,11 @@ print(f"4st year: {percent(grade_dist[3])}")
|
|||||||
print(f"5st year: {percent(grade_dist[4])}")
|
print(f"5st year: {percent(grade_dist[4])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(
|
||||||
plot_pie(
|
grade_dist,
|
||||||
grade_dist,
|
["Prvý ročník", "Druhý ročník", "Tretí ročník", "Štvrtý ročník", "Piaty ročník"],
|
||||||
["Prvý ročník", "Druhý ročník", "Tretí ročník", "Štvrtý ročník", "Piaty ročník"],
|
"Distribúcia ročníkov",
|
||||||
"Distribúcia ročníkov",
|
)
|
||||||
)
|
|
||||||
|
|
||||||
sex = dataset[:, 1]
|
sex = dataset[:, 1]
|
||||||
sex_dist = [
|
sex_dist = [
|
||||||
@ -79,15 +97,13 @@ print(f"Female: {percent(sex_dist[0])}")
|
|||||||
print(f"Male: {percent(sex_dist[1])}")
|
print(f"Male: {percent(sex_dist[1])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(sex_dist, ["Ženy", "Muži"], "Distribúcia pohlavia")
|
||||||
plot_pie(sex_dist, ["Ženy", "Muži"], "Distribúcia pohlavia")
|
|
||||||
|
|
||||||
print("--- GPA ---")
|
print("--- GPA ---")
|
||||||
print("n/a")
|
print("n/a")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_hist(dataset[:, 2], "Distribúcia piemernu známok", "Piemerná známka", "Počet študentov/tiek")
|
||||||
plot_hist(dataset[:, 2], "Distribúcia piemernu známok", "Piemerná známka", "Počet študentov/tiek")
|
|
||||||
|
|
||||||
math = dataset[:, 3]
|
math = dataset[:, 3]
|
||||||
math_dist = [
|
math_dist = [
|
||||||
@ -105,8 +121,7 @@ print(f"4: {percent(math_dist[3])}")
|
|||||||
print(f"5: {percent(math_dist[4])}")
|
print(f"5: {percent(math_dist[4])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(math_dist, ["1", "2", "3", "4", "5"], "Distribúcia známok z matematiky")
|
||||||
plot_pie(math_dist, ["1", "2", "3", "4", "5"], "Distribúcia známok z matematiky")
|
|
||||||
|
|
||||||
slovak = dataset[:, 4]
|
slovak = dataset[:, 4]
|
||||||
slovak_dist = [
|
slovak_dist = [
|
||||||
@ -124,8 +139,7 @@ print(f"4: {percent(slovak_dist[3])}")
|
|||||||
print(f"5: {percent(slovak_dist[4])}")
|
print(f"5: {percent(slovak_dist[4])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(slovak_dist, ["1", "2", "3", "4", "5"], "Distribúcia známok zo slovenčiny", (0, 0, 0, 0.25, 0.5))
|
||||||
plot_pie(slovak_dist, ["1", "2", "3", "4", "5"], "Distribúcia známok zo slovenčiny", (0, 0, 0, 0.25, 0.5))
|
|
||||||
|
|
||||||
english = dataset[:, 5]
|
english = dataset[:, 5]
|
||||||
english_dist = [
|
english_dist = [
|
||||||
@ -143,8 +157,7 @@ print(f"4: {percent(english_dist[3])}")
|
|||||||
print(f"5: {percent(english_dist[4])}")
|
print(f"5: {percent(english_dist[4])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(english_dist, ["1", "2", "3", "4", "5"], "Distribúcia známok z angličtiny")
|
||||||
plot_pie(english_dist, ["1", "2", "3", "4", "5"], "Distribúcia známok z angličtiny")
|
|
||||||
|
|
||||||
ses = dataset[:, 6]
|
ses = dataset[:, 6]
|
||||||
ses_dist = [
|
ses_dist = [
|
||||||
@ -158,8 +171,7 @@ print(f"Middle: {percent(ses_dist[1])}")
|
|||||||
print(f"Upper: {percent(ses_dist[2])}")
|
print(f"Upper: {percent(ses_dist[2])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(ses_dist, ["Nižšia trieda", "Stredná trieda", "Vyššia trieda"], "Distribúcia socio-ekonomických tried")
|
||||||
plot_pie(ses_dist, ["Nižšia trieda", "Stredná trieda", "Vyššia trieda"], "Distribúcia socio-ekonomických tried")
|
|
||||||
|
|
||||||
occupation = dataset[:, 7]
|
occupation = dataset[:, 7]
|
||||||
occupation_dist = [
|
occupation_dist = [
|
||||||
@ -179,10 +191,9 @@ print(f"other : {percent(occupation_dist[4])}")
|
|||||||
print(f"none : {percent(occupation_dist[5])}")
|
print(f"none : {percent(occupation_dist[5])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(occupation_dist,
|
||||||
plot_pie(occupation_dist,
|
["Práca 10 a viac hodín týždenne", "Práca menej ako 10 hodín týždenne", "Šport", "Hudba", "Niečo iné",
|
||||||
["Práca 10 a viac hodín týždenne", "Práca menej ako 10 hodín týždenne", "Šport", "Hudba", "Niečo iné",
|
"Žiadne"], "Distribúcia práce a aktivít")
|
||||||
"Žiadne"], "Distribúcia práce a aktivít")
|
|
||||||
|
|
||||||
living = dataset[:, 8]
|
living = dataset[:, 8]
|
||||||
living_dist = [
|
living_dist = [
|
||||||
@ -200,10 +211,9 @@ print(f"dorms : {percent(living_dist[3])}")
|
|||||||
print(f"other : {percent(living_dist[4])}")
|
print(f"other : {percent(living_dist[4])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(living_dist,
|
||||||
plot_pie(living_dist,
|
["S rodinou", "S rodinným príslušníkom/ou", "Sám/a alebo so spolubývajúcim/ou", "Intrák", "Iné"],
|
||||||
["S rodinou", "S rodinným príslušníkom/ou", "Sám/a alebo so spolubývajúcim/ou", "Intrák", "Iné"],
|
"Distribúcia životných situácií")
|
||||||
"Distribúcia životných situácií")
|
|
||||||
|
|
||||||
commute = dataset[:, 9]
|
commute = dataset[:, 9]
|
||||||
commute_dist = [
|
commute_dist = [
|
||||||
@ -221,10 +231,9 @@ print(f"<= 1h : {percent(commute_dist[3])}")
|
|||||||
print(f"> 1h : {percent(commute_dist[4])}")
|
print(f"> 1h : {percent(commute_dist[4])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(commute_dist,
|
||||||
plot_pie(commute_dist,
|
["Intrák", "Menej ako 15 minút", "Menej ako 30 minút", "Menej ako hodinu", "Viac ako hodinu"],
|
||||||
["Intrák", "Menej ako 15 minút", "Menej ako 30 minút", "Menej ako hodinu", "Viac ako hodinu"],
|
"Distribúcia dochádzania")
|
||||||
"Distribúcia dochádzania")
|
|
||||||
|
|
||||||
sleep = dataset[:, 10]
|
sleep = dataset[:, 10]
|
||||||
sleep_dist = [
|
sleep_dist = [
|
||||||
@ -238,12 +247,10 @@ print(f"medium sleepers: {percent(sleep_dist[1])}")
|
|||||||
print(f"long sleepers : {percent(sleep_dist[2])}")
|
print(f"long sleepers : {percent(sleep_dist[2])}")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_pie(sleep_dist, ["6 hodín a menej", "7 až 8 hodín", "9 a viac hodín"], "Distribúcia spánku")
|
||||||
plot_pie(sleep_dist, ["6 hodín a menej", "7 až 8 hodín", "9 a viac hodín"], "Distribúcia spánku")
|
|
||||||
|
|
||||||
print("--- ABSENCE ---")
|
print("--- ABSENCE ---")
|
||||||
print("n/a")
|
print("n/a")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
if graph:
|
plot_hist(dataset[:, 11], "Distribúcia absencií", "Počet neprítomných hodín", "Počet študentov/tiek")
|
||||||
plot_hist(dataset[:, 11], "Distribúcia absencií", "Počet neprítomných hodín", "Počet študentov/tiek")
|
|
||||||
|
@ -25,16 +25,23 @@ nvidia-nvjitlink-cu12==12.4.127
|
|||||||
nvidia-nvtx-cu12==12.4.127
|
nvidia-nvtx-cu12==12.4.127
|
||||||
packaging==24.2
|
packaging==24.2
|
||||||
pandas==2.2.3
|
pandas==2.2.3
|
||||||
|
pandas-flavor==0.6.0
|
||||||
|
patsy==1.0.1
|
||||||
pillow==11.0.0
|
pillow==11.0.0
|
||||||
pyparsing==3.2.0
|
pyparsing==3.2.0
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
pytz==2024.2
|
pytz==2024.2
|
||||||
scikit-learn==1.6.0
|
scikit-learn==1.6.0
|
||||||
|
scikit-posthocs==0.11.2
|
||||||
scipy==1.14.1
|
scipy==1.14.1
|
||||||
|
seaborn==0.13.2
|
||||||
setuptools==75.6.0
|
setuptools==75.6.0
|
||||||
six==1.17.0
|
six==1.17.0
|
||||||
|
statsmodels==0.14.4
|
||||||
sympy==1.13.1
|
sympy==1.13.1
|
||||||
|
tabulate==0.9.0
|
||||||
threadpoolctl==3.5.0
|
threadpoolctl==3.5.0
|
||||||
torch==2.5.1
|
torch==2.5.1
|
||||||
typing_extensions==4.12.2
|
typing_extensions==4.12.2
|
||||||
tzdata==2024.2
|
tzdata==2024.2
|
||||||
|
xarray==2024.11.0
|
||||||
|
@ -119,7 +119,7 @@ for epoch in range(epochs):
|
|||||||
pred = model(X)
|
pred = model(X)
|
||||||
loss = loss_fn(pred, y)
|
loss = loss_fn(pred, y)
|
||||||
|
|
||||||
test_loss = loss.item() * X.size(0)
|
test_loss += loss.item() * X.size(0)
|
||||||
|
|
||||||
test_loss /= len(test_dataset)
|
test_loss /= len(test_dataset)
|
||||||
test_losses.append(test_loss)
|
test_losses.append(test_loss)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user