⚡ Adds chi2 and fisher exact tests
This commit is contained in:
parent
feed2387c8
commit
465f56146a
44
analysis.py
44
analysis.py
@ -59,26 +59,38 @@ with open("dataset.txt") as stream:
|
|||||||
|
|
||||||
|
|
||||||
# 0 - year
|
# 0 - year
|
||||||
# 1 - abteilung (category) id (starts at 1)
|
# 1 - abteilung (category) idx (starts at 1)
|
||||||
# 2-7 - first to last place county ids
|
# 2-7 - first to last place county idxs
|
||||||
data_original = np.array(raw_data)
|
data_original = np.array(raw_data)
|
||||||
|
|
||||||
# table where counties are rows and category-scores are columnes
|
# table where counties are rows and counts of placements are columnes
|
||||||
# 01 | 02 | 03 | ...
|
# #1 | #2 | ...
|
||||||
# BA | 5 | 2 | 1 | ...
|
# BA | 5 | 4 | ...
|
||||||
# TT | 0 | 3 | 4 | ...
|
# ZA | 9 | 8 | ...
|
||||||
# KE | 4 | 1 | 5 | ...
|
# KE | 4 | 6 | ...
|
||||||
# ...
|
# as a row-first 2d numpy array (first dimension will represent counties, second counts of placements)
|
||||||
# as a row-first 2d numpy array (first dimension will represent counties, second category-scores)
|
data = np.zeros((counties_c, 5)) # 5 because top five
|
||||||
data = np.zeros((counties_c, categories_c))
|
|
||||||
for sample in data_original:
|
for sample in data_original:
|
||||||
category_id = sample[1] - 1 # because they start at 1
|
|
||||||
results = sample[2:7]
|
results = sample[2:7]
|
||||||
for i, county_id in enumerate(results):
|
for placement_idx, county_idx in enumerate(results):
|
||||||
# first -> 5
|
data[county_idx, placement_idx] += 1
|
||||||
# second -> 4
|
|
||||||
# ... (formula is 6 - i)
|
|
||||||
data[county_id, category_id] += 6 - i
|
|
||||||
|
|
||||||
|
|
||||||
|
print("Data:")
|
||||||
print(data)
|
print(data)
|
||||||
|
|
||||||
|
# H0: county and placement are independent
|
||||||
|
# H1: county and placement are not independent
|
||||||
|
print("\nAttempting Chi-Square test")
|
||||||
|
chi2, p, dof, expected = stats.chi2_contingency(data)
|
||||||
|
|
||||||
|
print(f"Chi-Square Statistic: {chi2}")
|
||||||
|
print(f"p-value: {p}")
|
||||||
|
print(f"Degrees of Freedom: {dof}")
|
||||||
|
#print("Expected Frequencies:\n", expected)
|
||||||
|
|
||||||
|
print("\nAttempting Fisher's Exact test")
|
||||||
|
oddsratio, p_value = stats.fisher_exact(data)
|
||||||
|
|
||||||
|
print(f"Odds Ratio: {oddsratio}")
|
||||||
|
print(f"p-value: {p_value}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user