File size: 6,920 Bytes
a73d60f
 
b2fd176
1960a99
 
b2fd176
 
1960a99
b2fd176
 
df0e756
b2fd176
1960a99
 
 
 
 
 
a73d60f
b2fd176
1960a99
 
 
 
a73d60f
 
 
1960a99
a73d60f
 
 
1960a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a73d60f
1960a99
 
 
 
df0e756
 
 
 
 
 
 
 
 
 
 
 
 
 
1960a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a73d60f
 
b2fd176
a73d60f
 
 
 
b2fd176
df0e756
b2fd176
 
a73d60f
1960a99
 
 
 
 
 
df0e756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1960a99
 
a73d60f
1960a99
 
 
b2fd176
df0e756
1960a99
df0e756
1960a99
 
df0e756
a73d60f
df0e756
 
 
 
 
 
 
 
 
 
 
 
 
1960a99
df0e756
 
 
1960a99
 
 
a73d60f
df0e756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1960a99
 
 
 
a73d60f
1960a99
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, mean_absolute_error, r2_score

# Import ML Models
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.linear_model import Perceptron

# Sidebar UI
st.sidebar.title("AI Code Generator 🧠")
st.sidebar.markdown("Generate AI models instantly!")

# Model Selection
model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
model = st.sidebar.selectbox("Choose a Model:", model_options)

# Task Selection
task_options = ["Classification", "Regression"]
task = st.sidebar.selectbox("Choose a Task:", task_options)

# Problem Selection based on Task and Model
problems = {
    "Classification": {
        "KNN": ["Spam Detection", "Disease Prediction"],
        "SVM": ["Image Recognition", "Text Classification"],
        "Random Forest": ["Fraud Detection", "Customer Segmentation"],
        "Decision Tree": ["Loan Approval", "Churn Prediction"],
        "Perceptron": ["Handwritten Digit Recognition", "Sentiment Analysis"]
    },
    "Regression": {
        "KNN": ["House Price Prediction", "Stock Prediction"],
        "SVM": ["Sales Forecasting", "Stock Market Trends"],
        "Random Forest": ["Energy Consumption", "Patient Survival Prediction"],
        "Decision Tree": ["House Price Estimation", "Revenue Prediction"],
        "Perceptron": ["Weather Forecasting", "Traffic Flow Prediction"]
    }
}

problem = st.sidebar.selectbox("Choose a Problem:", problems[task][model])

dataset_mapping = {name: f"datasets/{name.lower().replace(' ', '_')}.csv" for sublist in problems.values() for model in sublist for name in sublist[model]}

# # Dataset Selection (User selects a pre-existing fake dataset)
# dataset_mapping = {
#     "Spam Detection": "datasets/spam_detection.csv",
#     "Disease Prediction": "datasets/disease_prediction.csv",
#     "Image Recognition": "datasets/image_recognition.csv",
#     "Text Classification": "datasets/text_classification.csv",
#     "Fraud Detection": "datasets/fraud_detection.csv",
#     "Customer Segmentation": "datasets/customer_segmentation.csv",
#     "Loan Approval": "datasets/loan_approval.csv",
#     "House Price Prediction": "datasets/house_price_prediction.csv",
#     "Sales Forecasting": "datasets/sales_forecasting.csv",
# }

dataset_path = dataset_mapping.get(problem, "datasets/spam_detection.csv")
df = pd.read_csv(dataset_path)

# Display dataset
st.subheader("Sample Dataset")
st.write(df.head())

# Preprocessing Steps
st.subheader("πŸ“Œ Preprocessing Steps")
st.markdown("""
- βœ… Handle Missing Values  
- βœ… Encoding Categorical Variables  
- βœ… Feature Scaling  
- βœ… Feature Selection  
- βœ… Handling Imbalanced Data using **SMOTE**
""")

# Handle missing values
imputer = SimpleImputer(strategy='mean')
df = df.apply(lambda col: imputer.fit_transform(col.values.reshape(-1, 1)).flatten() if col.dtypes == 'float64' else col)

# Encoding categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Split Data
X = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Feature Selection
selector = SelectKBest(score_func=f_classif, k=min(5, X.shape[1]))  # Ensure k does not exceed available features
X_train = selector.fit_transform(X_train, y_train)
X_test = selector.transform(X_test)

# Handle imbalanced data
if task == "Classification":
    smote = SMOTE()
    X_train, y_train = smote.fit_resample(X_train, y_train)

# Model Initialization
if task == "Classification":
    n_neighbors = min(5, len(y_train))  # Ensure k is valid
    model_mapping = {
        "KNN": KNeighborsClassifier(n_neighbors=n_neighbors),
        "SVM": SVC(),
        "Random Forest": RandomForestClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Perceptron": Perceptron()
    }
else:
    n_neighbors = min(5, len(y_train))  # Ensure k is valid
    model_mapping = {
        "KNN": KNeighborsRegressor(n_neighbors=n_neighbors),
        "SVM": SVR(),
        "Random Forest": RandomForestRegressor(),
        "Decision Tree": DecisionTreeRegressor(),
        "Perceptron": Perceptron()
    }

model_instance = model_mapping[model]

# Train Model
model_instance.fit(X_train, y_train)
y_pred = model_instance.predict(X_test)

# Model Evaluation
st.subheader("πŸ“Š Model Evaluation")

if task == "Classification":
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)

    st.write(f"**Accuracy:** {accuracy:.2f}")
    st.json(report)  # Shows detailed structured metrics
    
elif task == "Regression":
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    st.write(f"**Mean Squared Error (MSE):** {mse:.4f}")
    st.write(f"**Mean Absolute Error (MAE):** {mae:.4f}")
    st.write(f"**RΒ² Score:** {r2:.4f}")

# Data Visualization
st.subheader("πŸ“ˆ Data Visualization")

# Heatmap
st.write("### πŸ”₯ Feature Correlation")
plt.figure(figsize=(8, 5))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
st.pyplot(plt)

# Pair Plot
st.write("### πŸ“Š Pair Plot of Features")
sns.pairplot(df, diag_kind='kde')
st.pyplot()

# Feature Importance (for tree-based models)
if model in ["Random Forest", "Decision Tree"]:
    feature_importances = model_instance.feature_importances_
    feature_names = X.columns
    importance_df = pd.DataFrame({"Feature": feature_names, "Importance": feature_importances})
    importance_df = importance_df.sort_values(by="Importance", ascending=False)
    
    st.write("### 🌟 Feature Importance")
    fig, ax = plt.subplots()
    sns.barplot(x=importance_df["Importance"], y=importance_df["Feature"], ax=ax)
    st.pyplot(fig)

# Download Code
st.download_button("🐍 Download Python Code (.py)", "ai_model.py")
st.download_button("πŸ““ Download Notebook (.ipynb)", "ai_model.ipynb")
st.markdown("[πŸš€ Open in Colab](https://colab.research.google.com/)")

st.success("Code generated! Download and do magic! ✨")