Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import json | |
from sklearn.compose import ColumnTransformer | |
from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
from io import StringIO | |
# Load selected features from JSON file | |
with open("selected_features.json", "r") as file: | |
selected_features = json.load(file) | |
def preprocess_data(dataframe): | |
# Identify numerical and categorical columns | |
numerical_cols = dataframe.select_dtypes(include=["number"]).columns | |
categorical_cols = [col for col in dataframe.columns if col not in numerical_cols] | |
# Preprocessing pipeline | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', StandardScaler(), numerical_cols), | |
('cat', OneHotEncoder(sparse_output=False, drop='first'), categorical_cols) | |
] | |
) | |
# Apply preprocessing | |
processed_data = preprocessor.fit_transform(dataframe) | |
feature_names = numerical_cols.tolist() + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols)) | |
return pd.DataFrame(processed_data, columns=feature_names) | |
def process_uploaded_data(file): | |
# Load dataset from uploaded file | |
data = pd.read_csv(file.name) | |
# Check for missing selected features | |
missing_features = [feature for feature in selected_features if feature not in data.columns] | |
if missing_features: | |
return f"Missing features: {', '.join(missing_features)}. Please upload a valid dataset." | |
# Preprocess data | |
data = data[selected_features] | |
processed_data = preprocess_data(data) | |
return processed_data.head(10).to_csv(index=False) | |
def process_manual_data(**inputs): | |
# Construct dataframe from manual inputs | |
input_data = pd.DataFrame([inputs]) | |
# Preprocess data | |
processed_data = preprocess_data(input_data) | |
return processed_data.head(10).to_csv(index=False) | |
# GUI for manual input | |
manual |