🐛 Fixes clean script

This commit is contained in:
Daniel Svitan 2024-12-15 12:13:30 +01:00
parent 25ae210f1c
commit d3d9ebfbe2
2 changed files with 24 additions and 4 deletions

View File

@ -10,10 +10,12 @@ The cleaned dataset will have the following structure:
| 3 | Math | int | [1-5] | | 3 | Math | int | [1-5] |
| 4 | Slovak | int | [1-5] | | 4 | Slovak | int | [1-5] |
| 5 | English | int | [1-5] | | 5 | English | int | [1-5] |
| 6 | Occupation | enum | [0-5] | | 6 | SES | enum | [0-2] |
| 7 | Living | enum | [0-4] | | 7 | Occupation | enum | [0-5] |
| 8 | Commute | enum | [0-4] | | 8 | Living | enum | [0-4] |
| 9 | Absence | int | - | | 9 | Commute | enum | [0-4] |
| 10 | Sleep | enum | [0-2] |
| 11 | Absence | int | - |
### Sex ### Sex
@ -22,6 +24,14 @@ The cleaned dataset will have the following structure:
1 - muz 1 - muz
``` ```
### SES
```
0 - lower class
1 - middle class
2 - upper class
```
### Occupation ### Occupation
``` ```
@ -52,3 +62,11 @@ The cleaned dataset will have the following structure:
3 - <= 1h 3 - <= 1h
4 - > 1h 4 - > 1h
``` ```
### Sleep
```
0 - long
1 - medium
2 - short
```

View File

@ -155,9 +155,11 @@ for i in range(1, len(df)):
current.append(math) current.append(math)
current.append(slovak) current.append(slovak)
current.append(english) current.append(english)
current.append(parse_ses(ses))
current.append(parse_occupation(occupation)) current.append(parse_occupation(occupation))
current.append(parse_living(living)) current.append(parse_living(living))
current.append(parse_commute(commute)) current.append(parse_commute(commute))
current.append(parse_sleep(sleep))
current.append(parse_absence(absence)) current.append(parse_absence(absence))
clean.append(np.array(current)) clean.append(np.array(current))