Surbhi commited on
Commit
b2fd176
Β·
1 Parent(s): e002b05

Feature extraction and model training

Browse files
Files changed (3) hide show
  1. app.py +116 -48
  2. models/trained_model.pkl +0 -0
  3. requirements.txt +3 -1
app.py CHANGED
@@ -1,41 +1,74 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
3
  import textwrap
4
 
5
- # Sidebar UI
6
- st.sidebar.title("AI Code Generator 🧠")
7
- st.sidebar.markdown("Generate AI models instantly!")
 
 
 
 
 
 
 
8
 
9
- # Model Selection
 
10
  model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
11
  model = st.sidebar.selectbox("Choose a Model:", model_options)
12
 
13
- # Task Selection
14
  task_options = ["Classification", "Regression"]
15
  task = st.sidebar.selectbox("Choose a Task:", task_options)
16
 
17
- # Problem Selection based on Task and Model
18
- problems = {
19
- "Classification": {
20
- "KNN": ["Disease Prediction", "Spam Detection"],
21
- "SVM": ["Image Recognition", "Text Classification"],
22
- "Random Forest": ["Fraud Detection", "Customer Segmentation"],
23
- "Decision Tree": ["Loan Approval", "Churn Prediction"],
24
- "Perceptron": ["Handwritten Digit Recognition", "Sentiment Analysis"]
25
- },
26
- "Regression": {
27
- "KNN": ["House Price Prediction", "Stock Prediction"],
28
- "SVM": ["Sales Forecasting", "Stock Market Trends"],
29
- "Random Forest": ["Energy Consumption", "Patient Survival Prediction"],
30
- "Decision Tree": ["House Price Estimation", "Revenue Prediction"],
31
- "Perceptron": ["Weather Forecasting", "Traffic Flow Prediction"]
32
- }
33
- }
34
 
35
- problem = st.sidebar.selectbox("Choose a Problem:", problems[task][model])
 
 
36
 
37
- # Generate AI Model Code
38
- def generate_code(model, task, problem):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  model_mapping = {
40
  "KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor",
41
  "SVM": "SVC" if task == "Classification" else "SVR",
@@ -43,46 +76,81 @@ def generate_code(model, task, problem):
43
  "Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor",
44
  "Perceptron": "Perceptron" if task == "Classification" else "Perceptron"
45
  }
46
-
47
- selected_model = model_mapping[model]
48
 
49
  template = f"""
50
  import numpy as np
51
  import pandas as pd
52
- from sklearn.model_selection import train_test_split
53
- from sklearn.preprocessing import StandardScaler
54
- from sklearn.{model.lower()} import {selected_model}
55
 
56
- # Load Dataset (Replace with your own dataset)
 
 
 
 
 
 
 
 
57
  data = pd.read_csv('dataset.csv')
58
- X = data.iloc[:, :-1] # Features
59
- y = data.iloc[:, -1] # Target
60
 
61
- # Train-Test Split
 
 
 
 
 
 
 
 
 
 
 
62
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
63
 
64
- # Standardize Features (if needed)
65
  scaler = StandardScaler()
66
  X_train = scaler.fit_transform(X_train)
67
  X_test = scaler.transform(X_test)
68
 
69
- # Model Initialization
70
- model = {selected_model}()
 
 
 
 
 
 
 
71
 
72
- # Training the model
 
73
  model.fit(X_train, y_train)
74
 
75
- # Evaluate Model
76
- accuracy = model.score(X_test, y_test)
77
- print("Model Accuracy:", accuracy)
 
 
 
 
 
 
 
 
 
 
 
 
78
  """
79
- return textwrap.dedent(template)
80
 
81
- code = generate_code(model, task, problem)
82
- st.code(code, language="python")
83
 
84
- # Download Buttons
85
- st.download_button("🐍 Download (.py)", code, "ai_model.py")
86
- st.download_button("πŸ““ Download (.ipynb)", code, "ai_model.ipynb")
 
87
 
88
- st.success("Code generated! Download and do magic! ✨")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
  import textwrap
6
 
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
9
+ from sklearn.impute import SimpleImputer
10
+ from sklearn.feature_selection import SelectKBest, f_classif, f_regression
11
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
12
+ from imblearn.over_sampling import SMOTE
13
+
14
+ # Streamlit UI
15
+ st.title("πŸš€ AI Code Generator")
16
+ st.markdown("Generate & Train ML Models with Preprocessing and Feature Selection")
17
 
18
+ # Sidebar UI
19
+ st.sidebar.title("Choose Options")
20
  model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
21
  model = st.sidebar.selectbox("Choose a Model:", model_options)
22
 
 
23
  task_options = ["Classification", "Regression"]
24
  task = st.sidebar.selectbox("Choose a Task:", task_options)
25
 
26
+ # Load Dataset
27
+ st.markdown("### Upload your Dataset (CSV)")
28
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ if uploaded_file:
31
+ data = pd.read_csv(uploaded_file)
32
+ st.write("Preview of Dataset:", data.head())
33
 
34
+ # Preprocessing Steps
35
+ st.markdown("### Data Preprocessing Steps")
36
+
37
+ # Handling Missing Values
38
+ st.write("βœ… Handling missing values using `SimpleImputer`")
39
+ imputer = SimpleImputer(strategy="mean")
40
+ data.fillna(data.mean(), inplace=True)
41
+
42
+ # Encoding Categorical Variables
43
+ st.write("βœ… Encoding categorical variables")
44
+ for col in data.select_dtypes(include=["object"]).columns:
45
+ data[col] = LabelEncoder().fit_transform(data[col])
46
+
47
+ # Splitting Data
48
+ X = data.iloc[:, :-1] # Features
49
+ y = data.iloc[:, -1] # Target
50
+
51
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
52
+
53
+ # Feature Scaling
54
+ st.write("βœ… Applying StandardScaler")
55
+ scaler = StandardScaler()
56
+ X_train = scaler.fit_transform(X_train)
57
+ X_test = scaler.transform(X_test)
58
+
59
+ # Handle Imbalanced Dataset using SMOTE
60
+ if task == "Classification":
61
+ st.write("βœ… Handling Imbalanced Dataset using SMOTE")
62
+ smote = SMOTE()
63
+ X_train, y_train = smote.fit_resample(X_train, y_train)
64
+
65
+ # Feature Selection
66
+ st.write("βœ… Selecting Best Features")
67
+ selector = SelectKBest(f_classif if task == "Classification" else f_regression, k=min(5, X.shape[1]))
68
+ X_train = selector.fit_transform(X_train, y_train)
69
+ X_test = selector.transform(X_test)
70
+
71
+ # Model Training
72
  model_mapping = {
73
  "KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor",
74
  "SVM": "SVC" if task == "Classification" else "SVR",
 
76
  "Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor",
77
  "Perceptron": "Perceptron" if task == "Classification" else "Perceptron"
78
  }
79
+
80
+ model_class = model_mapping[model]
81
 
82
  template = f"""
83
  import numpy as np
84
  import pandas as pd
85
+ import joblib
 
 
86
 
87
+ from sklearn.model_selection import train_test_split
88
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
89
+ from sklearn.impute import SimpleImputer
90
+ from sklearn.feature_selection import SelectKBest, f_classif, f_regression
91
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
92
+ from imblearn.over_sampling import SMOTE
93
+ from sklearn.{model.lower()} import {model_class}
94
+
95
+ # Load Dataset
96
  data = pd.read_csv('dataset.csv')
 
 
97
 
98
+ # Handling Missing Values
99
+ imputer = SimpleImputer(strategy="mean")
100
+ data.fillna(data.mean(), inplace=True)
101
+
102
+ # Encoding Categorical Variables
103
+ for col in data.select_dtypes(include=["object"]).columns:
104
+ data[col] = LabelEncoder().fit_transform(data[col])
105
+
106
+ # Splitting Data
107
+ X = data.iloc[:, :-1]
108
+ y = data.iloc[:, -1]
109
+
110
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
111
 
112
+ # Feature Scaling
113
  scaler = StandardScaler()
114
  X_train = scaler.fit_transform(X_train)
115
  X_test = scaler.transform(X_test)
116
 
117
+ # Handle Imbalanced Data (SMOTE)
118
+ if "{task}" == "Classification":
119
+ smote = SMOTE()
120
+ X_train, y_train = smote.fit_resample(X_train, y_train)
121
+
122
+ # Feature Selection
123
+ selector = SelectKBest(f_classif if "{task}" == "Classification" else f_regression, k=min(5, X.shape[1]))
124
+ X_train = selector.fit_transform(X_train, y_train)
125
+ X_test = selector.transform(X_test)
126
 
127
+ # Model Training
128
+ model = {model_class}()
129
  model.fit(X_train, y_train)
130
 
131
+ # Save Trained Model
132
+ joblib.dump(model, 'models/trained_model.pkl')
133
+
134
+ # Evaluation Metrics
135
+ if "{task}" == "Classification":
136
+ y_pred = model.predict(X_test)
137
+ print("Accuracy:", accuracy_score(y_test, y_pred))
138
+ print("Precision:", precision_score(y_test, y_pred, average='weighted'))
139
+ print("Recall:", recall_score(y_test, y_pred, average='weighted'))
140
+ print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
141
+ else:
142
+ y_pred = model.predict(X_test)
143
+ print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
144
+ print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
145
+ print("R2 Score:", r2_score(y_test, y_pred))
146
  """
 
147
 
148
+ st.code(template, language="python")
149
+ st.download_button("πŸ“₯ Download AI Model Code", template, "ai_model.py")
150
 
151
+ # Save Model
152
+ model_instance = eval(model_class)()
153
+ model_instance.fit(X_train, y_train)
154
+ joblib.dump(model_instance, "models/trained_model.pkl")
155
 
156
+ st.success("βœ… Model trained and saved as `trained_model.pkl`")
models/trained_model.pkl ADDED
File without changes
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  streamlit
2
- scikit-learn
3
  pandas
4
  numpy
 
 
 
 
1
  streamlit
 
2
  pandas
3
  numpy
4
+ scikit-learn
5
+ joblib
6
+ imbalanced-learn