from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np


def iris() -> tuple[np.array]:
    """
    returns a tuple of numpy arrays containing the
    iris dataset split into training and testing sets
    after being normalized and one-hot encoded 
    """
    iris = load_iris()
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data,
        iris.target,
        test_size=0.3,
        random_state=8675309,
    )
    scaler = StandardScaler()
    X_train, X_test = scaler.fit_transform(
        X_train
    ), scaler.fit_transform(
        X_test
    )

    y_train = OneHotEncoder().fit_transform(y_train.reshape(-1, 1)).toarray()
    y_test = OneHotEncoder().fit_transform(y_test.reshape(-1, 1)).toarray()
    return X_train, X_test, y_train, y_test