heartfailure / app.py
hackerbyhobby
corrected app.py
6dfb9dc unverified
raw
history blame
1.91 kB
import gradio as gr
import pandas as pd
import json
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from io import StringIO
# Load selected features from JSON file
with open("selected_features.json", "r") as file:
selected_features = json.load(file)
def preprocess_data(dataframe):
# Identify numerical and categorical columns
numerical_cols = dataframe.select_dtypes(include=["number"]).columns
categorical_cols = [col for col in dataframe.columns if col not in numerical_cols]
# Preprocessing pipeline
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numerical_cols),
('cat', OneHotEncoder(sparse_output=False, drop='first'), categorical_cols)
]
)
# Apply preprocessing
processed_data = preprocessor.fit_transform(dataframe)
feature_names = numerical_cols.tolist() + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols))
return pd.DataFrame(processed_data, columns=feature_names)
def process_uploaded_data(file):
# Load dataset from uploaded file
data = pd.read_csv(file.name)
# Check for missing selected features
missing_features = [feature for feature in selected_features if feature not in data.columns]
if missing_features:
return f"Missing features: {', '.join(missing_features)}. Please upload a valid dataset."
# Preprocess data
data = data[selected_features]
processed_data = preprocess_data(data)
return processed_data.head(10).to_csv(index=False)
def process_manual_data(**inputs):
# Construct dataframe from manual inputs
input_data = pd.DataFrame([inputs])
# Preprocess data
processed_data = preprocess_data(input_data)
return processed_data.head(10).to_csv(index=False)
# GUI for manual input
manual