diff --git a/get_data.py b/get_data.py
index af22714..e371c65 100644
--- a/get_data.py
+++ b/get_data.py
@@ -3,6 +3,7 @@ import requests
 import urllib.request
 from bs4 import BeautifulSoup
 from pypdf import PdfReader
+from typing import List
 
 r = requests.get("https://siov.sk/sutaze/stredoskolska-odborna-cinnost/")
 soup = BeautifulSoup(r.content, "html.parser")
@@ -32,10 +33,10 @@ for link in links:
 
     if not href.startswith("https://siov.sk") and not href.startswith("http://siov.sk"):
         href = "https://siov.sk" + href
-    print(href)
     pdfs.append(href)
 
 
+print(f"Found {len(pdfs)} pdfs")
 ok = input("Continue? [Y/n] ")
 if ok.lower() == "n":
     print("okay, bye")
@@ -43,10 +44,47 @@ if ok.lower() == "n":
 
 
 counties = ["BA", "TT", "TN", "NR", "ZA", "BB", "PO", "KE"]
+semifull_counties = ["Bratislavský kraj", "Trnavský kraj", "Trenčianský kraj", "Nitriansky kraj",
+                     "Žilinský kraj", "Banskobystrický kraj", "Prešovský kraj", "Košický kraj"]
 
 
-def extract_2024(cat_str: str, text: str):
-    pass
+def extract_2024(cat_str: str, lines: List[str]):
+    results = []
+    lines = text.split("\n")
+
+    for i in range(len(lines)):
+        line = lines[i]
+        if not line.startswith(cat_str):
+            continue
+
+        county = lines[i-1][-2:]
+        if county not in counties:
+            continue
+
+        results.append(county)
+        if len(results) == 5:
+            return results
+
+    return results
+
+
+def extract_2023(lines: List[str]):
+    results = []
+
+    for i in range(len(lines)):
+        line = lines[i]
+        if line[0:2] not in ["2.", "3.", "4.", "5.", "6."]:
+            continue
+
+        if lines[i-1] not in semifull_counties:
+            print("County not recognized: ", lines[i-1])
+            continue
+
+        results.append(counties[semifull_counties.index(lines[i-1])])
+        if len(results) == 5:
+            return results
+
+    return results
 
 
 for pdf in pdfs:
@@ -68,8 +106,12 @@ for pdf in pdfs:
         print("Coudln't get category, skipping")
         continue
     cat = int(max(matches[0]))
+    cat_str = f"{cat:>02}"
 
-    id = f"{year}-{cat:>02}"
+    if year != 2023:
+        continue
+
+    id = f"{year}-{cat_str}"
     path = f"data/r{id}.pdf"
     print(f"Downloading {id} - '{pdf}' -> '{path}'")
     urllib.request.urlretrieve(pdf, path)
@@ -79,7 +121,16 @@ for pdf in pdfs:
         print("Coudln't find pages, skipping")
         continue
     page = reader.pages[0]
-    print(page.extract_text())
+    
+    text = page.extract_text()
+    lines = text.split("\n")
+    results = []
+    match year:
+        case 2024:
+            results = extract_2024(cat_str, lines)
+        case 2023:
+            results = extract_2023(lines)
+    print("got results: ", results)
 
     exit(0)