Surbhi
Feature extraction and model training
b2fd176
raw
history blame
5.61 kB
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import textwrap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif, f_regression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
from imblearn.over_sampling import SMOTE
# Streamlit UI
st.title("πŸš€ AI Code Generator")
st.markdown("Generate & Train ML Models with Preprocessing and Feature Selection")
# Sidebar UI
st.sidebar.title("Choose Options")
model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
model = st.sidebar.selectbox("Choose a Model:", model_options)
task_options = ["Classification", "Regression"]
task = st.sidebar.selectbox("Choose a Task:", task_options)
# Load Dataset
st.markdown("### Upload your Dataset (CSV)")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file:
data = pd.read_csv(uploaded_file)
st.write("Preview of Dataset:", data.head())
# Preprocessing Steps
st.markdown("### Data Preprocessing Steps")
# Handling Missing Values
st.write("βœ… Handling missing values using `SimpleImputer`")
imputer = SimpleImputer(strategy="mean")
data.fillna(data.mean(), inplace=True)
# Encoding Categorical Variables
st.write("βœ… Encoding categorical variables")
for col in data.select_dtypes(include=["object"]).columns:
data[col] = LabelEncoder().fit_transform(data[col])
# Splitting Data
X = data.iloc[:, :-1] # Features
y = data.iloc[:, -1] # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Feature Scaling
st.write("βœ… Applying StandardScaler")
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Handle Imbalanced Dataset using SMOTE
if task == "Classification":
st.write("βœ… Handling Imbalanced Dataset using SMOTE")
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)
# Feature Selection
st.write("βœ… Selecting Best Features")
selector = SelectKBest(f_classif if task == "Classification" else f_regression, k=min(5, X.shape[1]))
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)
# Model Training
model_mapping = {
"KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor",
"SVM": "SVC" if task == "Classification" else "SVR",
"Random Forest": "RandomForestClassifier" if task == "Classification" else "RandomForestRegressor",
"Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor",
"Perceptron": "Perceptron" if task == "Classification" else "Perceptron"
}
model_class = model_mapping[model]
template = f"""
import numpy as np
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif, f_regression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
from imblearn.over_sampling import SMOTE
from sklearn.{model.lower()} import {model_class}
# Load Dataset
data = pd.read_csv('dataset.csv')
# Handling Missing Values
imputer = SimpleImputer(strategy="mean")
data.fillna(data.mean(), inplace=True)
# Encoding Categorical Variables
for col in data.select_dtypes(include=["object"]).columns:
data[col] = LabelEncoder().fit_transform(data[col])
# Splitting Data
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Handle Imbalanced Data (SMOTE)
if "{task}" == "Classification":
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)
# Feature Selection
selector = SelectKBest(f_classif if "{task}" == "Classification" else f_regression, k=min(5, X.shape[1]))
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)
# Model Training
model = {model_class}()
model.fit(X_train, y_train)
# Save Trained Model
joblib.dump(model, 'models/trained_model.pkl')
# Evaluation Metrics
if "{task}" == "Classification":
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
else:
y_pred = model.predict(X_test)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))
"""
st.code(template, language="python")
st.download_button("πŸ“₯ Download AI Model Code", template, "ai_model.py")
# Save Model
model_instance = eval(model_class)()
model_instance.fit(X_train, y_train)
joblib.dump(model_instance, "models/trained_model.pkl")
st.success("βœ… Model trained and saved as `trained_model.pkl`")