🧪 Adds goodness of fit test
This commit is contained in:
parent
bd60a9aa3f
commit
c3651fb62e
57
analysis.py
57
analysis.py
@ -36,6 +36,9 @@ categories = [
|
|||||||
]
|
]
|
||||||
categories_c = 17 # how many categories
|
categories_c = 17 # how many categories
|
||||||
|
|
||||||
|
# from how many years do we have data
|
||||||
|
years = 9
|
||||||
|
|
||||||
|
|
||||||
def map_counties(arr: List[str]) -> List[int]:
|
def map_counties(arr: List[str]) -> List[int]:
|
||||||
ret = []
|
ret = []
|
||||||
@ -57,7 +60,6 @@ with open("dataset.txt") as stream:
|
|||||||
|
|
||||||
raw_data.append([year, category, *map_counties(wins_raw)])
|
raw_data.append([year, category, *map_counties(wins_raw)])
|
||||||
|
|
||||||
|
|
||||||
# 0 - year
|
# 0 - year
|
||||||
# 1 - abteilung (category) idx (starts at 1)
|
# 1 - abteilung (category) idx (starts at 1)
|
||||||
# 2-7 - first to last place county idxs
|
# 2-7 - first to last place county idxs
|
||||||
@ -69,28 +71,53 @@ data_original = np.array(raw_data)
|
|||||||
# ZA | 9 | 8 | ...
|
# ZA | 9 | 8 | ...
|
||||||
# KE | 4 | 6 | ...
|
# KE | 4 | 6 | ...
|
||||||
# as a row-first 2d numpy array (first dimension will represent counties, second counts of placements)
|
# as a row-first 2d numpy array (first dimension will represent counties, second counts of placements)
|
||||||
data = np.zeros((counties_c, 5)) # 5 because top five
|
# data = np.zeros((counties_c, 5)) # 5 because top five
|
||||||
|
# for sample in data_original:
|
||||||
|
# results = sample[2:7]
|
||||||
|
# for placement_idx, county_idx in enumerate(results):
|
||||||
|
# data[county_idx, placement_idx] += 1
|
||||||
|
|
||||||
|
# data is table where rows represent placement and columns county index
|
||||||
|
# 1st | 5 | 1 | 2 | ...
|
||||||
|
# 2nd | 3 | 0 | 7 | ...
|
||||||
|
# 3rd ...
|
||||||
|
# data = np.zeros((5, years * categories_c)) # same as (5, len(data_original))
|
||||||
|
# for i, sample in enumerate(data_original):
|
||||||
|
# results = sample[2:7]
|
||||||
|
# for j in range(5):
|
||||||
|
# data[j][i] = results[j]
|
||||||
|
|
||||||
|
# wins per county
|
||||||
|
# goodness-of-fit problem using Chi Square
|
||||||
|
# based on observed vs expected frequency
|
||||||
|
observed = np.zeros(counties_c)
|
||||||
for sample in data_original:
|
for sample in data_original:
|
||||||
results = sample[2:7]
|
results = sample[2:7]
|
||||||
for placement_idx, county_idx in enumerate(results):
|
for i in results:
|
||||||
data[county_idx, placement_idx] += 1
|
observed[i] += 1
|
||||||
|
|
||||||
|
expected = np.ones_like(observed) * (sum(observed) / len(observed))
|
||||||
|
|
||||||
print("Data:")
|
print("Data:")
|
||||||
print(data)
|
print(observed)
|
||||||
|
print(expected)
|
||||||
|
|
||||||
|
chi2, p = stats.chisquare(f_obs=observed, f_exp=expected)
|
||||||
|
print(f"Chi-square = {chi2:.2f}, p-value = {p:.4f}")
|
||||||
|
|
||||||
# H0: county and placement are independent
|
# H0: county and placement are independent
|
||||||
# H1: county and placement are not independent
|
# H1: county and placement are not independent
|
||||||
print("\nAttempting Chi-Square test")
|
|
||||||
chi2, p, dof, expected = stats.chi2_contingency(data)
|
|
||||||
|
|
||||||
print(f"Chi-Square Statistic: {chi2}")
|
# print("\nAttempting Chi-Square test")
|
||||||
print(f"p-value: {p}")
|
# chi2, p, dof, expected = stats.chi2_contingency(data)
|
||||||
print(f"Degrees of Freedom: {dof}")
|
|
||||||
#print("Expected Frequencies:\n", expected)
|
|
||||||
|
|
||||||
print("\nAttempting Fisher's Exact test")
|
# print(f"Chi-Square Statistic: {chi2}")
|
||||||
oddsratio, p_value = stats.fisher_exact(data)
|
# print(f"p-value: {p}")
|
||||||
|
# print(f"Degrees of Freedom: {dof}")
|
||||||
|
# print("Expected Frequencies:\n", expected)
|
||||||
|
|
||||||
print(f"Odds Ratio: {oddsratio}")
|
# print("\nAttempting Fisher's Exact test")
|
||||||
print(f"p-value: {p_value}")
|
# oddsratio, p_value = stats.fisher_exact(data)
|
||||||
|
|
||||||
|
# print(f"Odds Ratio: {oddsratio}")
|
||||||
|
# print(f"p-value: {p_value}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user