Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from collections import Counter | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler, LabelEncoder | |
from sklearn.feature_selection import SelectKBest, f_classif | |
from sklearn.impute import SimpleImputer | |
from imblearn.over_sampling import SMOTE, RandomOverSampler | |
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, mean_absolute_error, r2_score | |
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor | |
from sklearn.svm import SVC, SVR | |
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor | |
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor | |
from sklearn.linear_model import Perceptron | |
# Sidebar UI | |
st.sidebar.title("AI Code Generator π§ ") | |
st.sidebar.markdown("Generate AI models instantly!") | |
# Model Selection | |
model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"] | |
model = st.sidebar.selectbox("Choose a Model:", model_options) | |
# Task Selection | |
task_options = ["Classification", "Regression"] | |
task = st.sidebar.selectbox("Choose a Task:", task_options) | |
# Problem Selection based on Task and Model | |
problems = { | |
"Classification": { | |
"KNN": ["Spam Detection", "Disease Prediction"], | |
"SVM": ["Image Recognition", "Text Classification"], | |
"Random Forest": ["Fraud Detection", "Customer Segmentation"], | |
"Decision Tree": ["Loan Approval", "Churn Prediction"], | |
"Perceptron": ["Handwritten Digit Recognition", "Sentiment Analysis"] | |
}, | |
"Regression": { | |
"KNN": ["House Price Prediction", "Stock Prediction"], | |
"SVM": ["Sales Forecasting", "Stock Market Trends"], | |
"Random Forest": ["Energy Consumption", "Patient Survival Prediction"], | |
"Decision Tree": ["House Price Estimation", "Revenue Prediction"], | |
"Perceptron": ["Weather Forecasting", "Traffic Flow Prediction"] | |
} | |
} | |
problem = st.sidebar.selectbox("Choose a Problem:", problems[task][model]) | |
# Dataset Mapping (Dynamic) | |
dataset_mapping = {name: f"datasets/{name.lower().replace(' ', '_')}.csv" | |
for sublist in problems.values() for model in sublist for name in sublist[model]} | |
dataset_path = dataset_mapping.get(problem, "datasets/spam_detection.csv") | |
df = pd.read_csv(dataset_path) | |
# Display dataset | |
st.subheader("Sample Dataset") | |
st.write(df.head()) | |
# Preprocessing Steps | |
st.subheader("π Preprocessing Steps") | |
st.markdown(""" | |
- β Handle Missing Values | |
- β Encoding Categorical Variables | |
- β Feature Scaling | |
- β Feature Selection | |
- β Handling Imbalanced Data using **SMOTE** | |
""") | |
# Handle missing values | |
imputer = SimpleImputer(strategy='mean') | |
df = df.apply(lambda col: imputer.fit_transform(col.values.reshape(-1, 1)).flatten() if col.dtypes == 'float64' else col) | |
# Encoding categorical variables | |
label_encoders = {} | |
for col in df.select_dtypes(include=['object']).columns: | |
label_encoders[col] = LabelEncoder() | |
df[col] = label_encoders[col].fit_transform(df[col]) | |
# Split Data | |
X = df.iloc[:, :-1] | |
y = df.iloc[:, -1] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Feature Scaling | |
scaler = StandardScaler() | |
X_train = scaler.fit_transform(X_train) | |
X_test = scaler.transform(X_test) | |
# Feature Selection | |
selector = SelectKBest(score_func=f_classif, k=min(X.shape[1], 5)) | |
X_train = selector.fit_transform(X_train, y_train) | |
X_test = selector.transform(X_test) | |
# Handle imbalanced data | |
if task == "Classification": | |
class_counts = Counter(y_train) | |
min_class_samples = min(class_counts.values()) | |
if min_class_samples > 5: | |
smote = SMOTE() | |
X_train, y_train = smote.fit_resample(X_train, y_train) | |
else: | |
ros = RandomOverSampler() | |
X_train, y_train = ros.fit_resample(X_train, y_train) | |
# Model Initialization | |
n_neighbors = min(5, len(y_train)) | |
model_mapping = { | |
"KNN": KNeighborsClassifier(n_neighbors=n_neighbors) if task == "Classification" else KNeighborsRegressor(n_neighbors=n_neighbors), | |
"SVM": SVC() if task == "Classification" else SVR(), | |
"Random Forest": RandomForestClassifier() if task == "Classification" else RandomForestRegressor(), | |
"Decision Tree": DecisionTreeClassifier() if task == "Classification" else DecisionTreeRegressor(), | |
"Perceptron": Perceptron() | |
} | |
model_instance = model_mapping[model] | |
# Train Model | |
model_instance.fit(X_train, y_train) | |
y_pred = model_instance.predict(X_test) | |
# Model Evaluation | |
st.subheader("π Model Evaluation") | |
if task == "Classification": | |
accuracy = accuracy_score(y_test, y_pred) | |
report = classification_report(y_test, y_pred, output_dict=True) | |
st.write(f"**Accuracy:** {accuracy:.2f}") | |
st.json(report) | |
else: | |
mse = mean_squared_error(y_test, y_pred) | |
mae = mean_absolute_error(y_test, y_pred) | |
r2 = r2_score(y_test, y_pred) | |
st.write(f"**MSE:** {mse:.4f}, **MAE:** {mae:.4f}, **RΒ² Score:** {r2:.4f}") | |
# Data Visualization | |
st.subheader("π Data Visualization") | |
st.write("### π₯ Feature Correlation") | |
fig, ax = plt.subplots(figsize=(8, 5)) | |
sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax) | |
st.pyplot(fig) | |
if model in ["Random Forest", "Decision Tree"] and hasattr(model_instance, "feature_importances_"): | |
importance_df = pd.DataFrame({"Feature": X.columns, "Importance": model_instance.feature_importances_}).sort_values(by="Importance", ascending=False) | |
st.write("### π Feature Importance") | |
fig, ax = plt.subplots() | |
sns.barplot(x=importance_df["Importance"], y=importance_df["Feature"], ax=ax) | |
st.pyplot(fig) | |
st.success("Code generated! π Download & run!") | |