import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import json from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.feature_selection import SelectKBest, f_classif from sklearn.impute import SimpleImputer from imblearn.over_sampling import SMOTE from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, mean_absolute_error, r2_score # Import ML Models from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor from sklearn.svm import SVC, SVR from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.linear_model import Perceptron # Sidebar UI st.sidebar.title("AI Code Generator 🧠") st.sidebar.markdown("Generate AI models instantly!") # Model Selection model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"] model = st.sidebar.selectbox("Choose a Model:", model_options) # Task Selection task_options = ["Classification", "Regression"] task = st.sidebar.selectbox("Choose a Task:", task_options) # Problem Selection based on Task and Model problems = { "Classification": { "KNN": ["Spam Detection", "Disease Prediction"], "SVM": ["Image Recognition", "Text Classification"], "Random Forest": ["Fraud Detection", "Customer Segmentation"], "Decision Tree": ["Loan Approval", "Churn Prediction"], "Perceptron": ["Handwritten Digit Recognition", "Sentiment Analysis"] }, "Regression": { "KNN": ["House Price Prediction", "Stock Prediction"], "SVM": ["Sales Forecasting", "Stock Market Trends"], "Random Forest": ["Energy Consumption", "Patient Survival Prediction"], "Decision Tree": ["House Price Estimation", "Revenue Prediction"], "Perceptron": ["Weather Forecasting", "Traffic Flow Prediction"] } } problem = st.sidebar.selectbox("Choose a Problem:", problems[task][model], key="problem_selection") # Dataset Selection (Simulated dataset paths) dataset_mapping = { "Spam Detection": "datasets/spam_detection.csv", "Disease Prediction": "datasets/disease_prediction.csv", "Image Recognition": "datasets/image_recognition.csv", "Text Classification": "datasets/text_classification.csv", "Fraud Detection": "datasets/fraud_detection.csv", "Customer Segmentation": "datasets/customer_segmentation.csv", "Loan Approval": "datasets/loan_approval.csv", "Churn Prediction": "datasets/churn_prediction.csv", "Handwritten Digit Recognition": "datasets/handwritten_digit_recognition.csv", "Sentiment Analysis": "datasets/sentiment_analysis.csv", "House Price Prediction": "datasets/house_price_prediction.csv", "Stock Prediction": "datasets/stock_prediction.csv", "Sales Forecasting": "datasets/sales_forecasting.csv", "Stock Market Trends": "datasets/stock_market_trends.csv", "Energy Consumption": "datasets/energy_consumption.csv", "Patient Survival Prediction": "datasets/patient_survival_prediction.csv", "House Price Estimation": "datasets/house_price_estimation.csv", "Revenue Prediction": "datasets/revenue_prediction.csv", "Weather Forecasting": "datasets/weather_forecasting.csv", "Traffic Flow Prediction": "datasets/traffic_flow_prediction.csv" } dataset_path = dataset_mapping.get(problem, "datasets/spam_detection.csv") df = pd.read_csv(dataset_path) # Model Initialization model_mapping = { "KNN": KNeighborsClassifier(n_neighbors=5) if task == "Classification" else KNeighborsRegressor(), "SVM": SVC() if task == "Classification" else SVR(), "Random Forest": RandomForestClassifier() if task == "Classification" else RandomForestRegressor(), "Decision Tree": DecisionTreeClassifier() if task == "Classification" else DecisionTreeRegressor(), "Perceptron": Perceptron() } # Generated AI Code generated_code = f""" # AI Model Code import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from {model_mapping[model].__class__.__module__} import {model_mapping[model].__class__.__name__} # Load Data df = pd.read_csv('{dataset_path}') X = df.iloc[:, :-1] y = df.iloc[:, -1] # Train/Test Split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Scaling scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Train Model model = {model_mapping[model].__class__.__name__}() model.fit(X_train, y_train) # Predict y_pred = model.predict(X_test) print(y_pred) """ # Display AI Code st.subheader("📜 Generated AI Model Code") st.code(generated_code, language="python") # Download Buttons (Top of UI) st.download_button("📥 Download Python Script (.py)", generated_code, file_name="ai_model.py", mime="text/x-python") st.download_button("📥 Download Jupyter Notebook (.ipynb)", json.dumps({"cells": [{"cell_type": "code", "source": generated_code.split("\n"), "metadata": {}}], "metadata": {}, "nbformat": 4, "nbformat_minor": 2}), file_name="ai_model.ipynb", mime="application/json") # Display dataset st.subheader("📊 Sample Dataset") st.write(df.head()) # Preprocessing Steps st.subheader("📌 Preprocessing Steps") st.markdown(""" - ✅ Handle Missing Values - ✅ Encoding Categorical Variables - ✅ Feature Scaling - ✅ Feature Selection - ✅ Handling Imbalanced Data using **SMOTE** """) # Handle missing values imputer = SimpleImputer(strategy='mean') df = df.apply(lambda col: imputer.fit_transform(col.values.reshape(-1, 1)).flatten() if col.dtypes == 'float64' else col) # Encoding categorical variables label_encoders = {} for col in df.select_dtypes(include=['object']).columns: label_encoders[col] = LabelEncoder() df[col] = label_encoders[col].fit_transform(df[col]) # Split Data X = df.iloc[:, :-1] # Features y = df.iloc[:, -1] # Target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Feature Scaling scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Feature Selection selector = SelectKBest(score_func=f_classif, k=min(5, X.shape[1])) # Ensure k does not exceed available features X_train = selector.fit_transform(X_train, y_train) X_test = selector.transform(X_test) # Handle imbalanced data (only for classification) if task == "Classification": if len(set(y_train)) > 1 and len(y_train) > 5: # Avoid SMOTE errors smote = SMOTE() X_train, y_train = smote.fit_resample(X_train, y_train) model_instance = model_mapping[model] # Train Model model_instance.fit(X_train, y_train) y_pred = model_instance.predict(X_test) # Model Evaluation st.subheader("📊 Model Evaluation") if task == "Classification": accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred, output_dict=True) st.write(f"**Accuracy:** {accuracy:.2f}") st.json(report) elif task == "Regression": mse = mean_squared_error(y_test, y_pred) mae = mean_absolute_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) st.write(f"**Mean Squared Error (MSE):** {mse:.4f}") st.write(f"**Mean Absolute Error (MAE):** {mae:.4f}") st.write(f"**R² Score:** {r2:.4f}") # Data Visualization st.subheader("📈 Data Visualization") # Heatmap st.write("### 🔥 Feature Correlation") fig, ax = plt.subplots(figsize=(8, 5)) sns.heatmap(df.corr(), annot=True, cmap="coolwarm", ax=ax) st.pyplot(fig) # Feature Importance (for tree-based models) if model in ["Random Forest", "Decision Tree"]: feature_importances = model_instance.feature_importances_ feature_names = X.columns importance_df = pd.DataFrame({"Feature": feature_names, "Importance": feature_importances}).sort_values(by="Importance", ascending=False) st.write("### 🌟 Feature Importance") fig, ax = plt.subplots() sns.barplot(x=importance_df["Importance"], y=importance_df["Feature"], ax=ax) st.pyplot(fig)