Spaces:

SurbhiB
/

ml-code-generator

Sleeping

App Files Files Community

Surbhi commited on Mar 14

Commit

b2fd176

1 Parent(s): e002b05

Feature extraction and model training

Browse files

Files changed (3) hide show

app.py +116 -48
models/trained_model.pkl +0 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,41 +1,74 @@
 import streamlit as st
 import pandas as pd
 import textwrap
-# Sidebar UI
-st.sidebar.title("AI Code Generator 🧠")
-st.sidebar.markdown("Generate AI models instantly!")
-# Model Selection
 model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
 model = st.sidebar.selectbox("Choose a Model:", model_options)
-# Task Selection
 task_options = ["Classification", "Regression"]
 task = st.sidebar.selectbox("Choose a Task:", task_options)
-# Problem Selection based on Task and Model
-problems = {
-    "Classification": {
-        "KNN": ["Disease Prediction", "Spam Detection"],
-        "SVM": ["Image Recognition", "Text Classification"],
-        "Random Forest": ["Fraud Detection", "Customer Segmentation"],
-        "Decision Tree": ["Loan Approval", "Churn Prediction"],
-        "Perceptron": ["Handwritten Digit Recognition", "Sentiment Analysis"]
-    },
-    "Regression": {
-        "KNN": ["House Price Prediction", "Stock Prediction"],
-        "SVM": ["Sales Forecasting", "Stock Market Trends"],
-        "Random Forest": ["Energy Consumption", "Patient Survival Prediction"],
-        "Decision Tree": ["House Price Estimation", "Revenue Prediction"],
-        "Perceptron": ["Weather Forecasting", "Traffic Flow Prediction"]
-    }
-}
-problem = st.sidebar.selectbox("Choose a Problem:", problems[task][model])
-# Generate AI Model Code
-def generate_code(model, task, problem):
     model_mapping = {
         "KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor",
         "SVM": "SVC" if task == "Classification" else "SVR",
@@ -43,46 +76,81 @@ def generate_code(model, task, problem):
         "Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor",
         "Perceptron": "Perceptron" if task == "Classification" else "Perceptron"
     }
-    selected_model = model_mapping[model]
     template = f"""
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-from sklearn.{model.lower()} import {selected_model}
-# Load Dataset (Replace with your own dataset)
 data = pd.read_csv('dataset.csv')
-X = data.iloc[:, :-1]  # Features
-y = data.iloc[:, -1]   # Target
-# Train-Test Split
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-# Standardize Features (if needed)
 scaler = StandardScaler()
 X_train = scaler.fit_transform(X_train)
 X_test = scaler.transform(X_test)
-# Model Initialization
-model = {selected_model}()
-# Training the model
 model.fit(X_train, y_train)
-# Evaluate Model
-accuracy = model.score(X_test, y_test)
-print("Model Accuracy:", accuracy)
 """
-    return textwrap.dedent(template)
-code = generate_code(model, task, problem)
-st.code(code, language="python")
-# Download Buttons
-st.download_button("🐍 Download (.py)", code, "ai_model.py")
-st.download_button("📓 Download (.ipynb)", code, "ai_model.ipynb")
-st.success("Code generated! Download and do magic! ✨")

 import streamlit as st
 import pandas as pd
+import numpy as np
+import joblib
 import textwrap
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.feature_selection import SelectKBest, f_classif, f_regression
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
+from imblearn.over_sampling import SMOTE
+# Streamlit UI
+st.title("🚀 AI Code Generator")
+st.markdown("Generate & Train ML Models with Preprocessing and Feature Selection")
+# Sidebar UI
+st.sidebar.title("Choose Options")
 model_options = ["KNN", "SVM", "Random Forest", "Decision Tree", "Perceptron"]
 model = st.sidebar.selectbox("Choose a Model:", model_options)
 task_options = ["Classification", "Regression"]
 task = st.sidebar.selectbox("Choose a Task:", task_options)
+# Load Dataset
+st.markdown("### Upload your Dataset (CSV)")
+uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+if uploaded_file:
+    data = pd.read_csv(uploaded_file)
+    st.write("Preview of Dataset:", data.head())
+    # Preprocessing Steps
+    st.markdown("### Data Preprocessing Steps")
+    # Handling Missing Values
+    st.write("✅ Handling missing values using `SimpleImputer`")
+    imputer = SimpleImputer(strategy="mean")
+    data.fillna(data.mean(), inplace=True)
+    # Encoding Categorical Variables
+    st.write("✅ Encoding categorical variables")
+    for col in data.select_dtypes(include=["object"]).columns:
+        data[col] = LabelEncoder().fit_transform(data[col])
+    # Splitting Data
+    X = data.iloc[:, :-1]  # Features
+    y = data.iloc[:, -1]   # Target
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # Feature Scaling
+    st.write("✅ Applying StandardScaler")
+    scaler = StandardScaler()
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+    # Handle Imbalanced Dataset using SMOTE
+    if task == "Classification":
+        st.write("✅ Handling Imbalanced Dataset using SMOTE")
+        smote = SMOTE()
+        X_train, y_train = smote.fit_resample(X_train, y_train)
+    # Feature Selection
+    st.write("✅ Selecting Best Features")
+    selector = SelectKBest(f_classif if task == "Classification" else f_regression, k=min(5, X.shape[1]))
+    X_train = selector.fit_transform(X_train, y_train)
+    X_test = selector.transform(X_test)
+    # Model Training
     model_mapping = {
         "KNN": "KNeighborsClassifier" if task == "Classification" else "KNeighborsRegressor",
         "SVM": "SVC" if task == "Classification" else "SVR",
         "Decision Tree": "DecisionTreeClassifier" if task == "Classification" else "DecisionTreeRegressor",
         "Perceptron": "Perceptron" if task == "Classification" else "Perceptron"
     }
+    model_class = model_mapping[model]
     template = f"""
 import numpy as np
 import pandas as pd
+import joblib
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.feature_selection import SelectKBest, f_classif, f_regression
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error, r2_score
+from imblearn.over_sampling import SMOTE
+from sklearn.{model.lower()} import {model_class}
+# Load Dataset
 data = pd.read_csv('dataset.csv')
+# Handling Missing Values
+imputer = SimpleImputer(strategy="mean")
+data.fillna(data.mean(), inplace=True)
+# Encoding Categorical Variables
+for col in data.select_dtypes(include=["object"]).columns:
+    data[col] = LabelEncoder().fit_transform(data[col])
+# Splitting Data
+X = data.iloc[:, :-1]
+y = data.iloc[:, -1]
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Feature Scaling
 scaler = StandardScaler()
 X_train = scaler.fit_transform(X_train)
 X_test = scaler.transform(X_test)
+# Handle Imbalanced Data (SMOTE)
+if "{task}" == "Classification":
+    smote = SMOTE()
+    X_train, y_train = smote.fit_resample(X_train, y_train)
+# Feature Selection
+selector = SelectKBest(f_classif if "{task}" == "Classification" else f_regression, k=min(5, X.shape[1]))
+X_train = selector.fit_transform(X_train, y_train)
+X_test = selector.transform(X_test)
+# Model Training
+model = {model_class}()
 model.fit(X_train, y_train)
+# Save Trained Model
+joblib.dump(model, 'models/trained_model.pkl')
+# Evaluation Metrics
+if "{task}" == "Classification":
+    y_pred = model.predict(X_test)
+    print("Accuracy:", accuracy_score(y_test, y_pred))
+    print("Precision:", precision_score(y_test, y_pred, average='weighted'))
+    print("Recall:", recall_score(y_test, y_pred, average='weighted'))
+    print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))
+else:
+    y_pred = model.predict(X_test)
+    print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
+    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
+    print("R2 Score:", r2_score(y_test, y_pred))
 """
+    st.code(template, language="python")
+    st.download_button("📥 Download AI Model Code", template, "ai_model.py")
+    # Save Model
+    model_instance = eval(model_class)()
+    model_instance.fit(X_train, y_train)
+    joblib.dump(model_instance, "models/trained_model.pkl")
+    st.success("✅ Model trained and saved as `trained_model.pkl`")

models/trained_model.pkl ADDED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 streamlit
-scikit-learn
 pandas
 numpy

 streamlit
 pandas
 numpy
+scikit-learn
+joblib
+imbalanced-learn