Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import joblib | |
import textwrap | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler, LabelEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.feature_selection import SelectKBest, f_classif, f_regression | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score | |
from imblearn.over_sampling import SMOTE | |
# Streamlit UI | |
st.title("π AI Code Generator") | |
st.markdown("Generate & Train ML Models with Preprocessing and Feature Selection") | |
# Sidebar UI | |
st.sidebar.title("Choose Options") | |
model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"] | |
model = st.sidebar.selectbox("Choose a Model:", model_options) | |
task_options = ["Classification", "Regression"] | |
task = st.sidebar.selectbox("Choose a Task:", task_options) | |
# Load Dataset | |
st.markdown("### Upload your Dataset (CSV)") | |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
if uploaded_file: | |
data = pd.read_csv(uploaded_file) | |
st.write("Preview of Dataset:", data.head()) | |
# Preprocessing Steps | |
st.markdown("### Data Preprocessing Steps") | |
# Handling Missing Values | |
st.write("β Handling missing values using `SimpleImputer`") | |
imputer = SimpleImputer(strategy="mean") | |
data.fillna(data.mean(), inplace=True) | |
# Encoding Categorical Variables | |
st.write("β Encoding categorical variables") | |
for col in data.select_dtypes(include=["object"]).columns: | |
data[col] = LabelEncoder().fit_transform(data[col]) | |
# Splitting Data | |
X = data.iloc[:, :-1] # Features | |
y = data.iloc[:, -1] # Target | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Feature Scaling | |
st.write("β Applying StandardScaler") | |
scaler = StandardScaler() | |
X_train = scaler.fit_transform(X_train) | |
X_test = scaler.transform(X_test) | |
# Handle Imbalanced Dataset using SMOTE | |
if task == "Classification": | |
st.write("β Handling Imbalanced Dataset using SMOTE") | |
smote = SMOTE() | |
X_train, y_train = smote.fit_resample(X_train, y_train) | |
# Feature Selection | |
st.write("β Selecting Best Features") | |
selector = SelectKBest(f_classif if task == "Classification" else f_regression, k=min(5, X.shape[1])) | |
X_train = selector.fit_transform(X_train, y_train) | |
X_test = selector.transform(X_test) | |
# Model Training | |
model_mapping = { | |
"KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor", | |
"SVM": "SVC" if task == "Classification" else "SVR", | |
"Random Forest": "RandomForestClassifier" if task == "Classification" else "RandomForestRegressor", | |
"Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor", | |
"Perceptron": "Perceptron" if task == "Classification" else "Perceptron" | |
} | |
model_class = model_mapping[model] | |
template = f""" | |
import numpy as np | |
import pandas as pd | |
import joblib | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler, LabelEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.feature_selection import SelectKBest, f_classif, f_regression | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score | |
from imblearn.over_sampling import SMOTE | |
from sklearn.{model.lower()} import {model_class} | |
# Load Dataset | |
data = pd.read_csv('dataset.csv') | |
# Handling Missing Values | |
imputer = SimpleImputer(strategy="mean") | |
data.fillna(data.mean(), inplace=True) | |
# Encoding Categorical Variables | |
for col in data.select_dtypes(include=["object"]).columns: | |
data[col] = LabelEncoder().fit_transform(data[col]) | |
# Splitting Data | |
X = data.iloc[:, :-1] | |
y = data.iloc[:, -1] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Feature Scaling | |
scaler = StandardScaler() | |
X_train = scaler.fit_transform(X_train) | |
X_test = scaler.transform(X_test) | |
# Handle Imbalanced Data (SMOTE) | |
if "{task}" == "Classification": | |
smote = SMOTE() | |
X_train, y_train = smote.fit_resample(X_train, y_train) | |
# Feature Selection | |
selector = SelectKBest(f_classif if "{task}" == "Classification" else f_regression, k=min(5, X.shape[1])) | |
X_train = selector.fit_transform(X_train, y_train) | |
X_test = selector.transform(X_test) | |
# Model Training | |
model = {model_class}() | |
model.fit(X_train, y_train) | |
# Save Trained Model | |
joblib.dump(model, 'models/trained_model.pkl') | |
# Evaluation Metrics | |
if "{task}" == "Classification": | |
y_pred = model.predict(X_test) | |
print("Accuracy:", accuracy_score(y_test, y_pred)) | |
print("Precision:", precision_score(y_test, y_pred, average='weighted')) | |
print("Recall:", recall_score(y_test, y_pred, average='weighted')) | |
print("F1 Score:", f1_score(y_test, y_pred, average='weighted')) | |
else: | |
y_pred = model.predict(X_test) | |
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred)) | |
print("Mean Squared Error:", mean_squared_error(y_test, y_pred)) | |
print("R2 Score:", r2_score(y_test, y_pred)) | |
""" | |
st.code(template, language="python") | |
st.download_button("π₯ Download AI Model Code", template, "ai_model.py") | |
# Save Model | |
model_instance = eval(model_class)() | |
model_instance.fit(X_train, y_train) | |
joblib.dump(model_instance, "models/trained_model.pkl") | |
st.success("β Model trained and saved as `trained_model.pkl`") | |