diff --git a/CLEAN.md b/CLEAN.md index 30b442d..a56febf 100644 --- a/CLEAN.md +++ b/CLEAN.md @@ -10,10 +10,12 @@ The cleaned dataset will have the following structure: | 3 | Math | int | [1-5] | | 4 | Slovak | int | [1-5] | | 5 | English | int | [1-5] | -| 6 | Occupation | enum | [0-5] | -| 7 | Living | enum | [0-4] | -| 8 | Commute | enum | [0-4] | -| 9 | Absence | int | - | +| 6 | SES | enum | [0-2] | +| 7 | Occupation | enum | [0-5] | +| 8 | Living | enum | [0-4] | +| 9 | Commute | enum | [0-4] | +| 10 | Sleep | enum | [0-2] | +| 11 | Absence | int | - | ### Sex @@ -22,6 +24,14 @@ The cleaned dataset will have the following structure: 1 - muz ``` +### SES + +``` +0 - lower class +1 - middle class +2 - upper class +``` + ### Occupation ``` @@ -52,3 +62,11 @@ The cleaned dataset will have the following structure: 3 - <= 1h 4 - > 1h ``` + +### Sleep + +``` +0 - long +1 - medium +2 - short +``` diff --git a/clean.py b/clean.py index ef374f8..4cbdc95 100644 --- a/clean.py +++ b/clean.py @@ -155,9 +155,11 @@ for i in range(1, len(df)): current.append(math) current.append(slovak) current.append(english) + current.append(parse_ses(ses)) current.append(parse_occupation(occupation)) current.append(parse_living(living)) current.append(parse_commute(commute)) + current.append(parse_sleep(sleep)) current.append(parse_absence(absence)) clean.append(np.array(current))