Spaces:
Sleeping
Sleeping
File size: 5,613 Bytes
a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 a73d60f b2fd176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import textwrap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif, f_regression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
from imblearn.over_sampling import SMOTE
# Streamlit UI
st.title("π AI Code Generator")
st.markdown("Generate & Train ML Models with Preprocessing and Feature Selection")
# Sidebar UI
st.sidebar.title("Choose Options")
model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
model = st.sidebar.selectbox("Choose a Model:", model_options)
task_options = ["Classification", "Regression"]
task = st.sidebar.selectbox("Choose a Task:", task_options)
# Load Dataset
st.markdown("### Upload your Dataset (CSV)")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file:
data = pd.read_csv(uploaded_file)
st.write("Preview of Dataset:", data.head())
# Preprocessing Steps
st.markdown("### Data Preprocessing Steps")
# Handling Missing Values
st.write("β
Handling missing values using `SimpleImputer`")
imputer = SimpleImputer(strategy="mean")
data.fillna(data.mean(), inplace=True)
# Encoding Categorical Variables
st.write("β
Encoding categorical variables")
for col in data.select_dtypes(include=["object"]).columns:
data[col] = LabelEncoder().fit_transform(data[col])
# Splitting Data
X = data.iloc[:, :-1] # Features
y = data.iloc[:, -1] # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Feature Scaling
st.write("β
Applying StandardScaler")
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Handle Imbalanced Dataset using SMOTE
if task == "Classification":
st.write("β
Handling Imbalanced Dataset using SMOTE")
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)
# Feature Selection
st.write("β
Selecting Best Features")
selector = SelectKBest(f_classif if task == "Classification" else f_regression, k=min(5, X.shape[1]))
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)
# Model Training
model_mapping = {
"KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor",
"SVM": "SVC" if task == "Classification" else "SVR",
"Random Forest": "RandomForestClassifier" if task == "Classification" else "RandomForestRegressor",
"Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor",
"Perceptron": "Perceptron" if task == "Classification" else "Perceptron"
}
model_class = model_mapping[model]
template = f"""
import numpy as np
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif, f_regression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
from imblearn.over_sampling import SMOTE
from sklearn.{model.lower()} import {model_class}
# Load Dataset
data = pd.read_csv('dataset.csv')
# Handling Missing Values
imputer = SimpleImputer(strategy="mean")
data.fillna(data.mean(), inplace=True)
# Encoding Categorical Variables
for col in data.select_dtypes(include=["object"]).columns:
data[col] = LabelEncoder().fit_transform(data[col])
# Splitting Data
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Handle Imbalanced Data (SMOTE)
if "{task}" == "Classification":
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)
# Feature Selection
selector = SelectKBest(f_classif if "{task}" == "Classification" else f_regression, k=min(5, X.shape[1]))
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)
# Model Training
model = {model_class}()
model.fit(X_train, y_train)
# Save Trained Model
joblib.dump(model, 'models/trained_model.pkl')
# Evaluation Metrics
if "{task}" == "Classification":
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
else:
y_pred = model.predict(X_test)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))
"""
st.code(template, language="python")
st.download_button("π₯ Download AI Model Code", template, "ai_model.py")
# Save Model
model_instance = eval(model_class)()
model_instance.fit(X_train, y_train)
joblib.dump(model_instance, "models/trained_model.pkl")
st.success("β
Model trained and saved as `trained_model.pkl`")
|