Schuhgröße: MAE “zu Fuß”

Schuhgröße: MAE “zu Fuß”#

import pandas as pd

datei = "synthetische_daten_3bis40_mit_BMI_Kategorien.csv"

#with open(datei) as f:
#    mein_text = f.read()
#mein_text.split("\n")

df = pd.read_csv(datei)

df.head()

	ID	Alter_Jahre	Geschlecht	Körpergröße_cm	Gewicht_kg	BMI	BMI_Kategorie	Schuhgröße_EU
0	1	13	w	160.2	51.1	19.9	Normalgewicht	38
1	2	7	m	128.1	23.6	14.4	Untergewichtig	31
2	3	4	w	100.7	16.5	16.3	Normalgewicht	25
3	4	11	w	148.9	39.5	17.8	Normalgewicht	36
4	5	9	w	129.7	30.1	17.9	Normalgewicht	31

from sklearn.model_selection import train_test_split

y = df["Schuhgröße_EU"]

type(y)

pandas.core.series.Series

df.columns

Index(['ID', 'Alter_Jahre', 'Geschlecht', 'Körpergröße_cm', 'Gewicht_kg',
       'BMI', 'BMI_Kategorie', 'Schuhgröße_EU'],
      dtype='object')

#meine_cols = ['ID', 'Alter_Jahre', 'Geschlecht', 'Körpergröße_cm', 'Gewicht_kg',
#       'BMI', 'BMI_Kategorie']

X = df.drop(["Geschlecht" , "Schuhgröße_EU", "BMI_Kategorie" ] , axis=1)

	ID	Alter_Jahre	Körpergröße_cm	Gewicht_kg	BMI
0	1	13	160.2	51.1	19.9
1	2	7	128.1	23.6	14.4
2	3	4	100.7	16.5	16.3
3	4	11	148.9	39.5	17.8
4	5	9	129.7	30.1	17.9
...	...	...	...	...	...
1495	1496	25	170.6	57.0	19.6
1496	1497	30	164.3	54.3	20.1
1497	1498	35	167.0	69.4	24.9
1498	1499	36	168.8	62.1	21.8
1499	1500	33	179.3	81.7	25.4

1500 rows × 5 columns

X.describe()

	ID	Alter_Jahre	Körpergröße_cm	Gewicht_kg	BMI
count	1500.000000	1500.000000	1500.000000	1500.000000	1500.000000
mean	750.500000	18.227333	153.753067	52.699200	20.692067
std	433.157015	10.560587	25.734906	23.107748	4.288830
min	1.000000	3.000000	76.200000	7.800000	8.500000
25%	375.750000	9.000000	135.700000	30.900000	17.100000
50%	750.500000	17.000000	162.950000	57.700000	20.900000
75%	1125.250000	27.000000	173.000000	71.200000	24.000000
max	1500.000000	40.000000	197.300000	111.500000	33.100000

from sklearn.model_selection import train_test_split

train_X, val_X, train_y, val_y = train_test_split(X, y, random_state = 0)

train_X.shape

(1125, 5)

val_X.shape

(375, 5)

from sklearn.linear_model import LinearRegression

reg = LinearRegression()

reg.fit(train_X, train_y)

LinearRegression()

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

val_X

	ID	Alter_Jahre	Körpergröße_cm	Gewicht_kg	BMI
471	472	16	162.0	55.6	21.2
9	10	13	156.3	46.9	19.2
1499	1500	33	179.3	81.7	25.4
54	55	10	147.6	31.2	14.3
1411	1412	27	184.0	75.2	22.2
...	...	...	...	...	...
75	76	5	105.4	16.3	14.7
481	482	13	159.5	56.2	22.1
1350	1351	38	169.7	68.8	23.9
748	749	10	136.8	34.2	18.3
1362	1363	35	167.0	77.3	27.7

375 rows × 5 columns

predict_y = reg.predict( val_X ) 

len(predict_y), len(val_y)

(375, 375)

#predict_y[:10]

#val_y[:10]

differenz = []

for i in range(0,10):
    v = list(val_y)[i]
    p = list(predict_y)[i]
    
    # print( f"val: {v}, predict: {p}")
    differenz.append( abs(v-p) )
differenz

[np.float64(0.2780887017681408),
 np.float64(0.44371682153970227),
 np.float64(0.4054127305193447),
 np.float64(0.5014711798481528),
 np.float64(0.31780106007500564),
 np.float64(0.010436596802485099),
 np.float64(0.2955949140213576),
 np.float64(0.06754289132536684),
 np.float64(0.36750307699773543),
 np.float64(0.09606540023445831)]

MAE = sum( differenz ) / len( differenz )
MAE

np.float64(0.278363337313175)

	fit_intercept	True
	copy_X	True
	tol	1e-06
	n_jobs	None
	positive	False