Spaces:
Sleeping
Sleeping
hackerbyhobby
commited on
app update
Browse files
app.py
CHANGED
|
@@ -2,17 +2,16 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
import json
|
| 4 |
from sklearn.compose import ColumnTransformer
|
| 5 |
-
from sklearn.preprocessing import
|
| 6 |
-
from io import StringIO
|
| 7 |
|
| 8 |
# Load selected features from JSON file
|
| 9 |
with open("selected_features.json", "r") as file:
|
| 10 |
selected_features = json.load(file)
|
| 11 |
|
| 12 |
-
def preprocess_data(
|
| 13 |
# Identify numerical and categorical columns
|
| 14 |
-
numerical_cols =
|
| 15 |
-
categorical_cols = [col for col in
|
| 16 |
|
| 17 |
# Preprocessing pipeline
|
| 18 |
preprocessor = ColumnTransformer(
|
|
@@ -23,31 +22,40 @@ def preprocess_data(dataframe):
|
|
| 23 |
)
|
| 24 |
|
| 25 |
# Apply preprocessing
|
| 26 |
-
processed_data = preprocessor.fit_transform(
|
| 27 |
-
feature_names = numerical_cols
|
| 28 |
return pd.DataFrame(processed_data, columns=feature_names)
|
| 29 |
|
| 30 |
-
def
|
| 31 |
-
#
|
| 32 |
-
data = pd.read_csv(file.name)
|
| 33 |
-
|
| 34 |
-
# Check for missing selected features
|
| 35 |
-
missing_features = [feature for feature in selected_features if feature not in data.columns]
|
| 36 |
-
if missing_features:
|
| 37 |
-
return f"Missing features: {', '.join(missing_features)}. Please upload a valid dataset."
|
| 38 |
-
|
| 39 |
-
# Preprocess data
|
| 40 |
-
data = data[selected_features]
|
| 41 |
-
processed_data = preprocess_data(data)
|
| 42 |
-
return processed_data.head(10).to_csv(index=False)
|
| 43 |
-
|
| 44 |
-
def process_manual_data(**inputs):
|
| 45 |
-
# Construct dataframe from manual inputs
|
| 46 |
input_data = pd.DataFrame([inputs])
|
| 47 |
|
| 48 |
-
# Preprocess data
|
| 49 |
processed_data = preprocess_data(input_data)
|
| 50 |
-
return processed_data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import json
|
| 4 |
from sklearn.compose import ColumnTransformer
|
| 5 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
|
|
|
| 6 |
|
| 7 |
# Load selected features from JSON file
|
| 8 |
with open("selected_features.json", "r") as file:
|
| 9 |
selected_features = json.load(file)
|
| 10 |
|
| 11 |
+
def preprocess_data(data):
|
| 12 |
# Identify numerical and categorical columns
|
| 13 |
+
numerical_cols = [col for col in data.columns if data[col].dtype in ['int64', 'float64']]
|
| 14 |
+
categorical_cols = [col for col in data.columns if col not in numerical_cols]
|
| 15 |
|
| 16 |
# Preprocessing pipeline
|
| 17 |
preprocessor = ColumnTransformer(
|
|
|
|
| 22 |
)
|
| 23 |
|
| 24 |
# Apply preprocessing
|
| 25 |
+
processed_data = preprocessor.fit_transform(data)
|
| 26 |
+
feature_names = numerical_cols + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols))
|
| 27 |
return pd.DataFrame(processed_data, columns=feature_names)
|
| 28 |
|
| 29 |
+
def process_manual_input(**inputs):
|
| 30 |
+
# Create a DataFrame for a single patient from inputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
input_data = pd.DataFrame([inputs])
|
| 32 |
|
| 33 |
+
# Preprocess the data
|
| 34 |
processed_data = preprocess_data(input_data)
|
| 35 |
+
return processed_data.to_csv(index=False)
|
| 36 |
+
|
| 37 |
+
# GUI inputs for each feature
|
| 38 |
+
gui_inputs = []
|
| 39 |
+
for feature in selected_features:
|
| 40 |
+
if feature.startswith("Had"): # Binary categorical features
|
| 41 |
+
gui_inputs.append(gr.Radio(label=feature, choices=["Yes", "No"], value="No"))
|
| 42 |
+
elif feature in ["BMI", "WeightInKilograms", "HeightInMeters"]: # Numerical features
|
| 43 |
+
gui_inputs.append(gr.Slider(label=feature, minimum=0, maximum=300, step=0.1, value=25))
|
| 44 |
+
elif feature == "PhysicalHealthDays": # Numerical feature with a smaller range
|
| 45 |
+
gui_inputs.append(gr.Slider(label=feature, minimum=0, maximum=30, step=1, value=5))
|
| 46 |
+
elif feature == "SleepHours": # Hours of sleep
|
| 47 |
+
gui_inputs.append(gr.Slider(label=feature, minimum=0, maximum=24, step=0.5, value=8))
|
| 48 |
+
else: # Default for any remaining numerical features
|
| 49 |
+
gui_inputs.append(gr.Slider(label=feature, minimum=0, maximum=100, step=1, value=50))
|
| 50 |
+
|
| 51 |
+
# Create the Gradio app interface
|
| 52 |
+
interface = gr.Interface(
|
| 53 |
+
fn=process_manual_input,
|
| 54 |
+
inputs=gui_inputs,
|
| 55 |
+
outputs=gr.Textbox(label="Processed Data (CSV Format)"),
|
| 56 |
+
title="Single Patient Data Preprocessor",
|
| 57 |
+
description="Input data for a single patient using sliders and radio buttons. The data will be preprocessed and displayed as CSV."
|
| 58 |
+
)
|
| 59 |
|
| 60 |
+
# Launch the app
|
| 61 |
+
interface.launch()
|