import gradio as gr import pandas as pd import json from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder, StandardScaler from io import StringIO # Load selected features from JSON file with open("selected_features.json", "r") as file: selected_features = json.load(file) def preprocess_data(dataframe): # Identify numerical and categorical columns numerical_cols = dataframe.select_dtypes(include=["number"]).columns categorical_cols = [col for col in dataframe.columns if col not in numerical_cols] # Preprocessing pipeline preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), numerical_cols), ('cat', OneHotEncoder(sparse_output=False, drop='first'), categorical_cols) ] ) # Apply preprocessing processed_data = preprocessor.fit_transform(dataframe) feature_names = numerical_cols.tolist() + list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_cols)) return pd.DataFrame(processed_data, columns=feature_names) def process_uploaded_data(file): # Load dataset from uploaded file data = pd.read_csv(file.name) # Check for missing selected features missing_features = [feature for feature in selected_features if feature not in data.columns] if missing_features: return f"Missing features: {', '.join(missing_features)}. Please upload a valid dataset." # Preprocess data data = data[selected_features] processed_data = preprocess_data(data) return processed_data.head(10).to_csv(index=False) def process_manual_data(**inputs): # Construct dataframe from manual inputs input_data = pd.DataFrame([inputs]) # Preprocess data processed_data = preprocess_data(input_data) return processed_data.head(10).to_csv(index=False) # GUI for manual input manual