Jensen-holm's picture
trying to move forwards with the iris dataset
6377159
raw
history blame
895 Bytes
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
def iris() -> tuple[np.array]:
"""
returns a tuple of numpy arrays containing the
iris dataset split into training and testing sets
after being normalized and one-hot encoded
"""
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
iris.data,
iris.target,
test_size=0.3,
random_state=8675309,
)
scaler = StandardScaler()
X_train, X_test = scaler.fit_transform(
X_train
), scaler.fit_transform(
X_test
)
y_train = OneHotEncoder().fit_transform(y_train.reshape(-1, 1)).toarray()
y_test = OneHotEncoder().fit_transform(y_test.reshape(-1, 1)).toarray()
return X_train, X_test, y_train, y_test