🚧 Adds (non-working) boxplot
This commit is contained in:
parent
2b0829c78e
commit
9b90fb2db3
@ -15,7 +15,7 @@ The cleaned dataset will have the following structure:
|
|||||||
| 8 | Living | enum | [0-4] |
|
| 8 | Living | enum | [0-4] |
|
||||||
| 9 | Commute | enum | [0-4] |
|
| 9 | Commute | enum | [0-4] |
|
||||||
| 10 | Sleep | enum | [0-2] |
|
| 10 | Sleep | enum | [0-2] |
|
||||||
| 11 | Absence | int | - |
|
| 11 | Absence | int | [0-∞] |
|
||||||
|
|
||||||
It will be saved in a `.npy` file (numpy format)
|
It will be saved in a `.npy` file (numpy format)
|
||||||
|
|
||||||
|
38
analyze.py
38
analyze.py
@ -1,7 +1,14 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
import argparse
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.stats as stats
|
import scipy.stats as stats
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-g", "--graph", action="store_true", default=False, help="Plot graph")
|
||||||
|
args = parser.parse_args()
|
||||||
|
graph = args.graph
|
||||||
|
|
||||||
|
|
||||||
def analyze(name: str, data: List[np.ndarray]):
|
def analyze(name: str, data: List[np.ndarray]):
|
||||||
@ -11,8 +18,37 @@ def analyze(name: str, data: List[np.ndarray]):
|
|||||||
|
|
||||||
if p > 0.05:
|
if p > 0.05:
|
||||||
print("statistically insignificant\n")
|
print("statistically insignificant\n")
|
||||||
return
|
return F, p
|
||||||
|
|
||||||
print("statistically significant")
|
print("statistically significant")
|
||||||
tukey_results = stats.tukey_hsd(*data)
|
tukey_results = stats.tukey_hsd(*data)
|
||||||
print(tukey_results)
|
print(tukey_results)
|
||||||
|
|
||||||
|
return F, p
|
||||||
|
|
||||||
|
|
||||||
|
def plot_box(data, labels, Fs, ps, title, titles):
|
||||||
|
if not graph:
|
||||||
|
return
|
||||||
|
|
||||||
|
fig, axs = plt.subplots(2, 2, sharex=True)
|
||||||
|
fig.suptitle(title)
|
||||||
|
fig.set_size_inches(12, 9)
|
||||||
|
|
||||||
|
for i in range(2):
|
||||||
|
for j in range(2):
|
||||||
|
print(f"{i}x{j} giving {i * 2 + j}")
|
||||||
|
axs[i, j].boxplot(data[i * 2 + j], labels=labels)
|
||||||
|
axs[i, j].set_title(titles[i * 2 + j])
|
||||||
|
|
||||||
|
F = round(Fs[i * 2 + j], 2)
|
||||||
|
p = round(ps[i * 2 + j], 4)
|
||||||
|
axs[i, j].text(0.01, 0.99, f"F-stat: {F}\np-val: {p}", ha="left", va="top", transform=axs[i, j].transAxes,
|
||||||
|
fontweight="bold")
|
||||||
|
|
||||||
|
avgs = np.array([a.mean() for a in data[i * 2 + j]])
|
||||||
|
print(avgs)
|
||||||
|
|
||||||
|
fig.tight_layout()
|
||||||
|
fig.show()
|
||||||
|
plt.show()
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from analyze import analyze
|
from analyze import analyze, plot_box
|
||||||
|
|
||||||
dataset = np.load("clean.npy")
|
dataset = np.load("clean.npy")
|
||||||
print(f"dataset shape: {dataset.shape}; analyzing column 1 (sex)")
|
print(f"dataset shape: {dataset.shape}; analyzing column 1 (sex)")
|
||||||
@ -11,13 +11,19 @@ print("")
|
|||||||
|
|
||||||
def analyze_sex(name: str, col: np.ndarray):
|
def analyze_sex(name: str, col: np.ndarray):
|
||||||
sex_col = dataset[:, 1]
|
sex_col = dataset[:, 1]
|
||||||
analyze(name, [
|
data = [
|
||||||
col[sex_col == 0],
|
col[sex_col == 0],
|
||||||
col[sex_col == 1]
|
col[sex_col == 1]
|
||||||
])
|
]
|
||||||
|
F, p = analyze(name, data)
|
||||||
|
return data, F, p
|
||||||
|
|
||||||
|
|
||||||
analyze_sex("gpa", dataset[:, 2])
|
data_gpa, F_gpa, p_gpa = analyze_sex("gpa", dataset[:, 2])
|
||||||
analyze_sex("math", dataset[:, 3])
|
data_math, F_math, p_math = analyze_sex("math", dataset[:, 3])
|
||||||
analyze_sex("slovak", dataset[:, 4])
|
data_slovak, F_slovak, p_slovak = analyze_sex("slovak", dataset[:, 4])
|
||||||
analyze_sex("english", dataset[:, 5])
|
data_english, F_english, p_english = analyze_sex("english", dataset[:, 5])
|
||||||
|
|
||||||
|
plot_box([data_gpa, data_math, data_slovak, data_english], ["Female", "Male"],
|
||||||
|
[F_gpa, F_math, F_slovak, F_english], [p_gpa, p_math, p_slovak, p_english],
|
||||||
|
"Pohlavie", ["Priemer", "Matematika", "Slovenčina", "Angličtina"])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user