Skizze Wollknäuel

Contents

Skizze Wollknäuel#

Mathilda Musterfrau
s-mmuster@haw…
MatNr: 12 34 567

Problemstellungen#

Beschrieben unter http://jbusse.de/dsci-ml_ws2022/Studienarbeit-SS-2023.html:

abschätzen Körpergröße
abschätzen Geschlecht

meine_Datei = "../data/MaennerFrauenKnaeuel.csv"

EDA Explorative Datenanalyse#

import pandas as pd

df = pd.read_csv(meine_Datei, sep=";")

df.head()

	Unnamed: 0	age	height	spezies
0	0	0.0	60	b
1	1	5.5	88	b
2	2	13.8	0	b
3	3	4.1	91	b
4	4	13.8	165	b

df.describe()

	Unnamed: 0	age	height
count	1260.000000	1260.000000	1260.000000
mean	629.500000	25.519444	105.180952
std	363.874979	23.836270	70.534624
min	0.000000	0.000000	-6.000000
25%	314.750000	2.900000	30.000000
50%	629.500000	17.800000	127.000000
75%	944.250000	44.600000	169.000000
max	1259.000000	80.000000	208.000000

df.shape

(1260, 4)

df.columns

Index(['Unnamed: 0', 'age', 'height', 'spezies'], dtype='object')

df.spezies.unique()

array(['b', 'g', 'm', 'M', 'w', 'F', 'K'], dtype=object)

Problem 1: Abschätzen Körpergröße#

y = df.pop("height")
y

      60
      88
       0
      91
     165
       ... 
  104
  208
  112
    0
  140
Name: height, Length: 1260, dtype: int64

X = df
X

	Unnamed: 0	age	spezies
0	0	0.0	b
1	1	5.5	b
2	2	13.8	b
3	3	4.1	b
4	4	13.8	b
...	...	...	...
1255	1255	28.1	K
1256	1256	3.2	K
1257	1257	0.0	K
1258	1258	29.9	K
1259	1259	12.9	K

1260 rows × 3 columns

from sklearn.tree import DecisionTreeRegressor

# Define model. Specify a number for random_state to ensure same results each run
melbourne_model = DecisionTreeRegressor(random_state=1)

# Fit model
melbourne_model.fit(X, y)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_19813/839580878.py in ?()
# Define model. Specify a number for random_state to ensure same results each run
melbourne_model = DecisionTreeRegressor(random_state=1)

# Fit model
----> 7 melbourne_model.fit(X, y)

~/miniconda3/lib/python3.13/site-packages/sklearn/base.py in ?(estimator, *args, **kwargs)
               skip_parameter_validation=(
                   prefer_skip_nested_validation or global_skip_validation
               )
           ):
-> 1365                 return fit_method(estimator, *args, **kwargs)

~/miniconda3/lib/python3.13/site-packages/sklearn/tree/_classes.py in ?(self, X, y, sample_weight, check_input)
       self : DecisionTreeRegressor
           Fitted estimator.
       """

-> 1404         super()._fit(
           X,
           y,
           sample_weight=sample_weight,

~/miniconda3/lib/python3.13/site-packages/sklearn/tree/_classes.py in ?(self, X, y, sample_weight, check_input, missing_values_in_feature_mask)
           check_X_params = dict(
               dtype=DTYPE, accept_sparse="csc", ensure_all_finite=False
           )
           check_y_params = dict(ensure_2d=False, dtype=None)
--> 252             X, y = validate_data(
               self, X, y, validate_separately=(check_X_params, check_y_params)
           )


~/miniconda3/lib/python3.13/site-packages/sklearn/utils/validation.py in ?(_estimator, X, y, reset, validate_separately, skip_check_array, **check_params)
           # :(
           check_X_params, check_y_params = validate_separately
           if "estimator" not in check_X_params:
               check_X_params = {**default_check_params, **check_X_params}
-> 2966             X = check_array(X, input_name="X", **check_X_params)
           if "estimator" not in check_y_params:
               check_y_params = {**default_check_params, **check_y_params}
           y = check_array(y, input_name="y", **check_y_params)

~/miniconda3/lib/python3.13/site-packages/sklearn/utils/validation.py in ?(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
                       )
                   array = xp.astype(array, dtype, copy=False)
               else:
                   array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
-> 1054             except ComplexWarning as complex_warning:
               raise ValueError(
                   "Complex data not supported\n{}\n".format(array)
               ) from complex_warning

~/miniconda3/lib/python3.13/site-packages/sklearn/utils/_array_api.py in ?(array, dtype, order, copy, xp, device)
       # Use NumPy API to support order
       if copy is True:
           array = numpy.array(array, order=order, dtype=dtype)
       else:
--> 757             array = numpy.asarray(array, order=order, dtype=dtype)

       # At this point array is a NumPy ndarray. We convert it to an array
       # container that is consistent with the input's namespace.

~/miniconda3/lib/python3.13/site-packages/pandas/core/generic.py in ?(self, dtype, copy)
           )
       values = self._values
       if copy is None:
           # Note: branch avoids `copy=None` for NumPy 1.x support
-> 2168             arr = np.asarray(values, dtype=dtype)
       else:
           arr = np.array(values, dtype=dtype, copy=copy)


ValueError: could not convert string to float: 'b'

Aufgabe 2: Geschlecht abschätzen#