Spaces:
Runtime error
Runtime error
File size: 2,959 Bytes
d50344a 5d54022 d50344a 5d54022 d50344a 5d54022 857bde5 5d54022 16b217f 5d54022 d50344a 5d54022 d50344a 55a248d de576e2 100fe27 de576e2 100fe27 de576e2 d50344a 100fe27 de576e2 55a248d d50344a de576e2 d50344a a6d130c d50344a 16b217f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
import xgboost as xgb
import pandas as pd
from datasets import load_dataset
from sklearn.model_selection import train_test_split
# Load the dataset
dataset = load_dataset("Ammok/hair_health")
# Convert to Pandas DataFrame for exploration
df = pd.DataFrame(dataset['train'])
### PREPROCESSING
# Replace "No Data" entries with NaN for missing values handling
df.replace("No Data", pd.NA, inplace=True)
# Handle missing numerical values with mean
df.fillna(df.select_dtypes(include=['number']).mean(), inplace=True)
# Handle missing categorical values with mode
for col in df.select_dtypes(include=['object']).columns:
df[col] = df[col].fillna(df[col].mode()[0])
# One-hot encoding for categorical variables
categorical_cols = [
'Genetics', 'Hormonal Changes', 'Medical Conditions',
'Medications & Treatments', 'Nutritional Deficiencies ', 'Stress',
'Poor Hair Care Habits ', 'Environmental Factors', 'Smoking', 'Weight Loss '
]
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
# Extract features and target
X = df.drop(columns=["Hair Loss"])
y = df["Hair Loss"]
# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a basic XGBoost model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)
def predict(*inputs):
# Convert the list of inputs into a dictionary
input_data = {col: val for col, val in zip(X.columns, inputs)}
# Handle missing values or intentionally omitted fields
for col in X.columns:
if input_data.get(col) is None:
if X[col].dtype == 'float64': # For numerical features
input_data[col] = X[col].mean() # Use the mean for missing numerical values
else: # For categorical features
input_data[col] = X[col].mode()[0] # Use the mode for missing categorical values
# Convert input data to a DataFrame
data = pd.DataFrame([input_data], columns=X.columns)
prediction = model.predict(data)
return prediction[0]
# Set up Gradio interface for data exploration
def explore_data(row_number):
return df.iloc[row_number].to_dict()
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Hair Health Dataset Exploration")
row_number_input = gr.Number(label="Row Number")
data_output = gr.JSON(label="Row Data")
row_number_input.change(explore_data, inputs=[row_number_input], outputs=[data_output])
gr.Markdown("## Make a Prediction")
# Create a dictionary for input components
input_components = {col: gr.Number(label=col) for col in X.columns} # Generate number inputs for each column
output = gr.Textbox(label="Prediction")
submit_button = gr.Button("Predict")
# Unpack the dictionary values into a list of input components
submit_button.click(predict, inputs=list(input_components.values()), outputs=[output])
demo.launch()
|