import streamlit as st import pandas as pd import numpy as np import joblib import textwrap from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.impute import SimpleImputer from sklearn.feature_selection import SelectKBest, f_classif, f_regression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score from imblearn.over_sampling import SMOTE # Streamlit UI st.title("🚀 AI Code Generator") st.markdown("Generate & Train ML Models with Preprocessing and Feature Selection") # Sidebar UI st.sidebar.title("Choose Options") model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"] model = st.sidebar.selectbox("Choose a Model:", model_options) task_options = ["Classification", "Regression"] task = st.sidebar.selectbox("Choose a Task:", task_options) # Load Dataset st.markdown("### Upload your Dataset (CSV)") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file: data = pd.read_csv(uploaded_file) st.write("Preview of Dataset:", data.head()) # Preprocessing Steps st.markdown("### Data Preprocessing Steps") # Handling Missing Values st.write("✅ Handling missing values using `SimpleImputer`") imputer = SimpleImputer(strategy="mean") data.fillna(data.mean(), inplace=True) # Encoding Categorical Variables st.write("✅ Encoding categorical variables") for col in data.select_dtypes(include=["object"]).columns: data[col] = LabelEncoder().fit_transform(data[col]) # Splitting Data X = data.iloc[:, :-1] # Features y = data.iloc[:, -1] # Target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Feature Scaling st.write("✅ Applying StandardScaler") scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Handle Imbalanced Dataset using SMOTE if task == "Classification": st.write("✅ Handling Imbalanced Dataset using SMOTE") smote = SMOTE() X_train, y_train = smote.fit_resample(X_train, y_train) # Feature Selection st.write("✅ Selecting Best Features") selector = SelectKBest(f_classif if task == "Classification" else f_regression, k=min(5, X.shape[1])) X_train = selector.fit_transform(X_train, y_train) X_test = selector.transform(X_test) # Model Training model_mapping = { "KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor", "SVM": "SVC" if task == "Classification" else "SVR", "Random Forest": "RandomForestClassifier" if task == "Classification" else "RandomForestRegressor", "Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor", "Perceptron": "Perceptron" if task == "Classification" else "Perceptron" } model_class = model_mapping[model] template = f""" import numpy as np import pandas as pd import joblib from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.impute import SimpleImputer from sklearn.feature_selection import SelectKBest, f_classif, f_regression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score from imblearn.over_sampling import SMOTE from sklearn.{model.lower()} import {model_class} # Load Dataset data = pd.read_csv('dataset.csv') # Handling Missing Values imputer = SimpleImputer(strategy="mean") data.fillna(data.mean(), inplace=True) # Encoding Categorical Variables for col in data.select_dtypes(include=["object"]).columns: data[col] = LabelEncoder().fit_transform(data[col]) # Splitting Data X = data.iloc[:, :-1] y = data.iloc[:, -1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Feature Scaling scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Handle Imbalanced Data (SMOTE) if "{task}" == "Classification": smote = SMOTE() X_train, y_train = smote.fit_resample(X_train, y_train) # Feature Selection selector = SelectKBest(f_classif if "{task}" == "Classification" else f_regression, k=min(5, X.shape[1])) X_train = selector.fit_transform(X_train, y_train) X_test = selector.transform(X_test) # Model Training model = {model_class}() model.fit(X_train, y_train) # Save Trained Model joblib.dump(model, 'models/trained_model.pkl') # Evaluation Metrics if "{task}" == "Classification": y_pred = model.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) print("Precision:", precision_score(y_test, y_pred, average='weighted')) print("Recall:", recall_score(y_test, y_pred, average='weighted')) print("F1 Score:", f1_score(y_test, y_pred, average='weighted')) else: y_pred = model.predict(X_test) print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred)) print("Mean Squared Error:", mean_squared_error(y_test, y_pred)) print("R2 Score:", r2_score(y_test, y_pred)) """ st.code(template, language="python") st.download_button("📥 Download AI Model Code", template, "ai_model.py") # Save Model model_instance = eval(model_class)() model_instance.fit(X_train, y_train) joblib.dump(model_instance, "models/trained_model.pkl") st.success("✅ Model trained and saved as `trained_model.pkl`")