56 lines
1.2 KiB
Python
56 lines
1.2 KiB
Python
from typing import List
|
|
import scipy.stats as stats
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
counties = {
|
|
"BA": 0,
|
|
"TN": 1,
|
|
"TT": 2,
|
|
"NR": 3,
|
|
"BB": 4,
|
|
"ZA": 5,
|
|
"PO": 6,
|
|
"KE": 7
|
|
}
|
|
counties_k = list(counties.keys())
|
|
|
|
|
|
def map_counties(arr: List[str]) -> List[int]:
|
|
ret = []
|
|
for county in arr:
|
|
ret.append(counties[county])
|
|
return ret
|
|
|
|
|
|
raw_data = []
|
|
with open("parsed.txt") as stream:
|
|
for line in stream.readlines():
|
|
if not line:
|
|
continue
|
|
|
|
split = line.strip().split(" ")
|
|
year = int(split[0])
|
|
category = int(split[1])
|
|
wins_raw = split[2].split(",")
|
|
|
|
raw_data.append([year, category, *map_counties(wins_raw)])
|
|
|
|
|
|
# 0 - year
|
|
# 1 - abteilung id
|
|
# 2-7 - first to last place county ids
|
|
data = np.array(raw_data)
|
|
|
|
print("Testing place distribution for normality by county")
|
|
for id in range(8):
|
|
places = []
|
|
for sample in data:
|
|
for i, v in enumerate(sample[2:7]):
|
|
if v == id:
|
|
places.append(i)
|
|
|
|
# null hypothesis is that the sample comes from a normal distribution
|
|
F, p = stats.normaltest(places)
|
|
print(f"{counties_k[id]}: {p:.4f} - {"not " if p < 0.05 else ""}normally distributed")
|