Adds parsing 2022 pdf results

This commit is contained in:
Daniel Svitan 2025-04-18 19:33:26 +02:00
parent 96a6810fcd
commit 8e4cac0789

View File

@ -44,8 +44,10 @@ if ok.lower() == "n":
counties = ["BA", "TT", "TN", "NR", "ZA", "BB", "PO", "KE"]
semifull_counties = ["Bratislavský kraj", "Trnavský kraj", "Trenčianský kraj", "Nitriansky kraj",
counties2023 = ["Bratislavský kraj", "Trnavský kraj", "Trenčianský kraj", "Nitriansky kraj",
"Žilinský kraj", "Banskobystrický kraj", "Prešovský kraj", "Košický kraj"]
counties2022 = ["Bratislavský", "Trnavský", "Trenčiansky", "Nitriansky",
"Žilinský", "Banskobystrický", "Prešovský", "Košický"]
def extract_2024(cat_str: str, lines: List[str]):
@ -71,16 +73,26 @@ def extract_2024(cat_str: str, lines: List[str]):
def extract_2023(lines: List[str]):
results = []
for i in range(len(lines)):
line = lines[i]
if line[0:2] not in ["2.", "3.", "4.", "5.", "6."]:
for line in lines:
if line not in counties2023:
continue
if lines[i-1] not in semifull_counties:
print("County not recognized: ", lines[i-1])
results.append(counties[counties2023.index(line)])
if len(results) == 5:
return results
return results
def extract_2022(lines: List[str]):
results = []
for line in lines:
word = line.strip().split(" ")[-1].strip()
if word not in counties2022:
continue
results.append(counties[semifull_counties.index(lines[i-1])])
results.append(counties[counties2022.index(word)])
if len(results) == 5:
return results
@ -108,7 +120,7 @@ for pdf in pdfs:
cat = int(max(matches[0]))
cat_str = f"{cat:>02}"
if year != 2023:
if year != 2022:
continue
id = f"{year}-{cat_str}"
@ -130,6 +142,8 @@ for pdf in pdfs:
results = extract_2024(cat_str, lines)
case 2023:
results = extract_2023(lines)
case 2022:
results = extract_2022(lines)
print("got results: ", results)
exit(0)