🐛 Fixes clean script

2024-12-15 12:13:30 +01:00
parent 25ae210f1c
commit d3d9ebfbe2
2 changed files with 24 additions and 4 deletions
--- a/CLEAN.md
+++ b/CLEAN.md
@@ -10,10 +10,12 @@ The cleaned dataset will have the following structure:
 | 3     | Math       | int   | [1-5] |
 | 4     | Slovak     | int   | [1-5] |
 | 5     | English    | int   | [1-5] |
-| 6     | Occupation | enum  | [0-5] |
+| 6     | SES        | enum  | [0-2] |
-| 7     | Living     | enum  | [0-4] |
+| 7     | Occupation | enum  | [0-5] |
-| 8     | Commute    | enum  | [0-4] |
+| 8     | Living     | enum  | [0-4] |
-| 9     | Absence    | int   | -     |
+| 9     | Commute    | enum  | [0-4] |
 | 10    | Sleep      | enum  | [0-2] |
 | 11    | Absence    | int   | -     |
 ### Sex
@@ -22,6 +24,14 @@ The cleaned dataset will have the following structure:
 1 - muz
 ```
 ### SES
 ```
 0 - lower class
 1 - middle class
 2 - upper class
 ```
 ### Occupation
 ```
@@ -52,3 +62,11 @@ The cleaned dataset will have the following structure:
 3 - <= 1h
 4 - > 1h
 ```
 ### Sleep
 ```
 0 - long
 1 - medium
 2 - short
 ```
--- a/clean.py
+++ b/clean.py
@@ -155,9 +155,11 @@ for i in range(1, len(df)):
    current.append(math)
    current.append(slovak)
    current.append(english)
    current.append(parse_ses(ses))
    current.append(parse_occupation(occupation))
    current.append(parse_living(living))
    current.append(parse_commute(commute))
    current.append(parse_sleep(sleep))
    current.append(parse_absence(absence))
    clean.append(np.array(current))