Spaces:
Sleeping
Sleeping
File size: 1,913 Bytes
55f664e 6dfb9dc 55f664e 6dfb9dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import pandas as pd
import json
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from io import StringIO
# Load selected features from JSON file
with open("selected_features.json", "r") as file:
selected_features = json.load(file)
def preprocess_data(dataframe):
# Identify numerical and categorical columns
numerical_cols = dataframe.select_dtypes(include=["number"]).columns
categorical_cols = [col for col in dataframe.columns if col not in numerical_cols]
# Preprocessing pipeline
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numerical_cols),
('cat', OneHotEncoder(sparse_output=False, drop='first'), categorical_cols)
]
)
# Apply preprocessing
processed_data = preprocessor.fit_transform(dataframe)
feature_names = numerical_cols.tolist() + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols))
return pd.DataFrame(processed_data, columns=feature_names)
def process_uploaded_data(file):
# Load dataset from uploaded file
data = pd.read_csv(file.name)
# Check for missing selected features
missing_features = [feature for feature in selected_features if feature not in data.columns]
if missing_features:
return f"Missing features: {', '.join(missing_features)}. Please upload a valid dataset."
# Preprocess data
data = data[selected_features]
processed_data = preprocess_data(data)
return processed_data.head(10).to_csv(index=False)
def process_manual_data(**inputs):
# Construct dataframe from manual inputs
input_data = pd.DataFrame([inputs])
# Preprocess data
processed_data = preprocess_data(input_data)
return processed_data.head(10).to_csv(index=False)
# GUI for manual input
manual |