Schuhgröße: MAE “zu Fuß”

Schuhgröße: MAE “zu Fuß”#

import pandas as pd
datei = "synthetische_daten_3bis40_mit_BMI_Kategorien.csv"
#with open(datei) as f:
#    mein_text = f.read()
#mein_text.split("\n")
df = pd.read_csv(datei)
df.head()
ID Alter_Jahre Geschlecht Körpergröße_cm Gewicht_kg BMI BMI_Kategorie Schuhgröße_EU
0 1 13 w 160.2 51.1 19.9 Normalgewicht 38
1 2 7 m 128.1 23.6 14.4 Untergewichtig 31
2 3 4 w 100.7 16.5 16.3 Normalgewicht 25
3 4 11 w 148.9 39.5 17.8 Normalgewicht 36
4 5 9 w 129.7 30.1 17.9 Normalgewicht 31
from sklearn.model_selection import train_test_split
y = df["Schuhgröße_EU"]
type(y)
pandas.core.series.Series
df.columns
Index(['ID', 'Alter_Jahre', 'Geschlecht', 'Körpergröße_cm', 'Gewicht_kg',
       'BMI', 'BMI_Kategorie', 'Schuhgröße_EU'],
      dtype='object')
#meine_cols = ['ID', 'Alter_Jahre', 'Geschlecht', 'Körpergröße_cm', 'Gewicht_kg',
#       'BMI', 'BMI_Kategorie']
X = df.drop(["Geschlecht" , "Schuhgröße_EU", "BMI_Kategorie" ] , axis=1)
X
ID Alter_Jahre Körpergröße_cm Gewicht_kg BMI
0 1 13 160.2 51.1 19.9
1 2 7 128.1 23.6 14.4
2 3 4 100.7 16.5 16.3
3 4 11 148.9 39.5 17.8
4 5 9 129.7 30.1 17.9
... ... ... ... ... ...
1495 1496 25 170.6 57.0 19.6
1496 1497 30 164.3 54.3 20.1
1497 1498 35 167.0 69.4 24.9
1498 1499 36 168.8 62.1 21.8
1499 1500 33 179.3 81.7 25.4

1500 rows × 5 columns

X.describe()
ID Alter_Jahre Körpergröße_cm Gewicht_kg BMI
count 1500.000000 1500.000000 1500.000000 1500.000000 1500.000000
mean 750.500000 18.227333 153.753067 52.699200 20.692067
std 433.157015 10.560587 25.734906 23.107748 4.288830
min 1.000000 3.000000 76.200000 7.800000 8.500000
25% 375.750000 9.000000 135.700000 30.900000 17.100000
50% 750.500000 17.000000 162.950000 57.700000 20.900000
75% 1125.250000 27.000000 173.000000 71.200000 24.000000
max 1500.000000 40.000000 197.300000 111.500000 33.100000
from sklearn.model_selection import train_test_split

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)
train_X.shape
(1125, 5)
val_X.shape
(375, 5)
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(train_X, train_y)
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
val_X
ID Alter_Jahre Körpergröße_cm Gewicht_kg BMI
471 472 16 162.0 55.6 21.2
9 10 13 156.3 46.9 19.2
1499 1500 33 179.3 81.7 25.4
54 55 10 147.6 31.2 14.3
1411 1412 27 184.0 75.2 22.2
... ... ... ... ... ...
75 76 5 105.4 16.3 14.7
481 482 13 159.5 56.2 22.1
1350 1351 38 169.7 68.8 23.9
748 749 10 136.8 34.2 18.3
1362 1363 35 167.0 77.3 27.7

375 rows × 5 columns

predict_y = reg.predict( val_X ) 
len(predict_y), len(val_y)
(375, 375)
#predict_y[:10]
#val_y[:10]
differenz = []

for i in range(0,10):
    v = list(val_y)[i]
    p = list(predict_y)[i]
    
    # print( f"val: {v}, predict: {p}")
    differenz.append( abs(v-p) )
differenz
[np.float64(0.2780887017681408),
 np.float64(0.44371682153970227),
 np.float64(0.4054127305193447),
 np.float64(0.5014711798481528),
 np.float64(0.31780106007500564),
 np.float64(0.010436596802485099),
 np.float64(0.2955949140213576),
 np.float64(0.06754289132536684),
 np.float64(0.36750307699773543),
 np.float64(0.09606540023445831)]
MAE = sum( differenz ) / len( differenz )
MAE
np.float64(0.278363337313175)