from typing import List import scipy.stats as stats import numpy as np import matplotlib.pyplot as plt counties = { "BA": 0, "TN": 1, "TT": 2, "NR": 3, "BB": 4, "ZA": 5, "PO": 6, "KE": 7 } counties_k = list(counties.keys()) def map_counties(arr: List[str]) -> List[int]: ret = [] for county in arr: ret.append(counties[county]) return ret raw_data = [] with open("parsed.txt") as stream: for line in stream.readlines(): if not line: continue split = line.strip().split(" ") year = int(split[0]) category = int(split[1]) wins_raw = split[2].split(",") raw_data.append([year, category, *map_counties(wins_raw)]) # 0 - year # 1 - abteilung id # 2-7 - first to last place county ids data = np.array(raw_data) print("Testing place distribution for normality by county") for id in range(8): places = [] for sample in data: for i, v in enumerate(sample[2:7]): if v == id: places.append(i) # null hypothesis is that the sample comes from a normal distribution F, p = stats.normaltest(places) print(f"{counties_k[id]}: {p:.4f} - {"not " if p < 0.05 else ""}normally distributed")