🔥 Fixes numpy not loading dataset

This commit is contained in:
Daniel Svitan 2025-05-03 18:02:45 +02:00
parent 85ba8d9651
commit ef07162c05
3 changed files with 58 additions and 13 deletions

55
analysis.py Normal file
View File

@ -0,0 +1,55 @@
from typing import List
import scipy.stats as stats
import numpy as np
import matplotlib.pyplot as plt
counties = {
"BA": 0,
"TN": 1,
"TT": 2,
"NR": 3,
"BB": 4,
"ZA": 5,
"PO": 6,
"KE": 7
}
counties_k = list(counties.keys())
def map_counties(arr: List[str]) -> List[int]:
ret = []
for county in arr:
ret.append(counties[county])
return ret
raw_data = []
with open("parsed.txt") as stream:
for line in stream.readlines():
if not line:
continue
split = line.strip().split(" ")
year = int(split[0])
category = int(split[1])
wins_raw = split[2].split(",")
raw_data.append([year, category, *map_counties(wins_raw)])
# 0 - year
# 1 - abteilung id
# 2-7 - first to last place county ids
data = np.array(raw_data)
print("Testing place distribution for normality by county")
for id in range(8):
places = []
for sample in data:
for i, v in enumerate(sample[2:7]):
if v == id:
places.append(i)
# null hypothesis is that the sample comes from a normal distribution
F, p = stats.normaltest(places)
print(f"{counties_k[id]}: {p:.4f} - {"not " if p < 0.05 else ""}normally distributed")

View File

@ -134,16 +134,3 @@
2024 15 BA,KE,ZA,PO,ZA
2024 16 PO,ZA,NR,BB,KE
2024 17 BB,PO,ZA,PO,TT
BA - Bratislavsky
TN - Trnavsky
TT - Trencinsky
NR - Nitriansky
BB - Banskobystricky
ZA - Zilinsky
KE - Kosicky
PO - Presovsky

View File

@ -1,2 +1,5 @@
requests
beautifulsoup4
numpy
matplotlib
scipy