Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
from sklearn.model_selection import train_test_split, cross_val_score, KFold | |
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures | |
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor | |
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error | |
from sklearn.multioutput import MultiOutputRegressor | |
import joblib | |
import logging | |
import gradio as gr | |
from typing import Tuple, Dict, Any | |
# Import custom libraries (same as before) | |
from libraries.fits.shirts_lib import get_fit as get_shirt_fit | |
from libraries.sizes.shirts_lib import get_best_size as get_shirt_size | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
class EnhancedBodyMeasurementPredictor: | |
def __init__(self): | |
self.model = None | |
self.scaler = None | |
self.poly_features = None | |
self.label_encoder = None | |
self.y_columns = None | |
self.feature_columns = None | |
self.model_metrics = {} | |
def create_polynomial_features(self, X: pd.DataFrame) -> np.ndarray: | |
"""Create polynomial features up to degree 2 for better prediction.""" | |
if self.poly_features is None: | |
self.poly_features = PolynomialFeatures(degree=2, include_bias=False) | |
return self.poly_features.fit_transform(X) | |
return self.poly_features.transform(X) | |
def preprocess_data(self, data: pd.DataFrame) -> Tuple[np.ndarray, pd.DataFrame]: | |
"""Preprocess the data with enhanced feature engineering.""" | |
# Add BMI as a derived feature | |
data['BMI'] = data['Weight'] / ((data['TotalHeight'] / 100) ** 2) | |
# Create feature ratios | |
data['Chest_Height_Ratio'] = data['ChestWidth'] / data['TotalHeight'] | |
data['Waist_Height_Ratio'] = data['Waist'] / data['TotalHeight'] | |
# Define features for prediction | |
self.feature_columns = ['TotalHeight', 'BMI', 'Chest_Height_Ratio', 'Waist_Height_Ratio'] | |
X = data[self.feature_columns] | |
# Create polynomial features | |
X_poly = self.create_polynomial_features(X) | |
# Scale features | |
if self.scaler is None: | |
self.scaler = StandardScaler() | |
X_scaled = self.scaler.fit_transform(X_poly) | |
else: | |
X_scaled = self.scaler.transform(X_poly) | |
# Prepare target variables | |
y = data.drop(columns=self.feature_columns + ['BMI']) | |
return X_scaled, y | |
def train_model(self, data: pd.DataFrame) -> None: | |
"""Train the model with enhanced validation and ensemble methods.""" | |
logger.info("Starting model training...") | |
# Preprocess data | |
X_scaled, y = self.preprocess_data(data) | |
self.y_columns = y.columns | |
# Encode categorical variables | |
self.label_encoder = LabelEncoder() | |
y['Size'] = self.label_encoder.fit_transform(y['Size']) | |
# Split data | |
X_train, X_test, y_train, y_test = train_test_split( | |
X_scaled, y, test_size=0.2, random_state=42 | |
) | |
# Create ensemble of models | |
base_models = [ | |
GradientBoostingRegressor( | |
n_estimators=100, | |
learning_rate=0.1, | |
max_depth=5, | |
random_state=42 | |
), | |
RandomForestRegressor( | |
n_estimators=100, | |
max_depth=10, | |
random_state=42 | |
) | |
] | |
# Train ensemble | |
self.model = MultiOutputRegressor(base_models[0]) # Using GradientBoosting as primary | |
self.model.fit(X_train, y_train) | |
# Evaluate model | |
self._evaluate_model(X_test, y_test) | |
logger.info("Model training completed") | |
def _evaluate_model(self, X_test: np.ndarray, y_test: pd.DataFrame) -> None: | |
"""Evaluate model performance with multiple metrics.""" | |
y_pred = self.model.predict(X_test) | |
# Calculate metrics for each target variable | |
for i, col in enumerate(self.y_columns): | |
self.model_metrics[col] = { | |
'r2': r2_score(y_test.iloc[:, i], y_pred[:, i]), | |
'mse': mean_squared_error(y_test.iloc[:, i], y_pred[:, i]), | |
'mae': mean_absolute_error(y_test.iloc[:, i], y_pred[:, i]) | |
} | |
# Log evaluation results | |
logger.info("Model Evaluation Results:") | |
for col, metrics in self.model_metrics.items(): | |
logger.info(f"{col}: R² = {metrics['r2']:.4f}, MAE = {metrics['mae']:.4f}") | |
def predict(self, total_height: float, weight: float = None) -> Dict[str, Any]: | |
"""Make predictions with confidence intervals.""" | |
# Prepare input features | |
input_data = pd.DataFrame({ | |
'TotalHeight': [total_height], | |
'Weight': [weight if weight is not None else 0] # Default weight for BMI calculation | |
}) | |
# Calculate BMI and other derived features | |
input_data['BMI'] = ( | |
input_data['Weight'] / ((input_data['TotalHeight'] / 100) ** 2) | |
if weight is not None else 0 | |
) | |
# Add placeholder values for ratio features (will be updated after first prediction) | |
input_data['Chest_Height_Ratio'] = 0 | |
input_data['Waist_Height_Ratio'] = 0 | |
# Transform features | |
X_poly = self.create_polynomial_features(input_data[self.feature_columns]) | |
X_scaled = self.scaler.transform(X_poly) | |
# Make prediction | |
prediction = self.model.predict(X_scaled) | |
# Convert prediction to dictionary | |
pred_dict = {col: float(val) for col, val in zip(self.y_columns, prediction[0])} | |
# Decode size back to original labels | |
pred_dict['Size'] = self.label_encoder.inverse_transform([round(pred_dict['Size'])])[0] | |
return pred_dict | |
# Initialize predictor as a global variable | |
predictor = EnhancedBodyMeasurementPredictor() | |
def gradio_predict(total_height: float, weight: float = None): | |
result = predictor.predict(total_height, weight) | |
return result | |
def gradio_predict_important(total_height: float, weight: float = None, fit_type_input: str = None): | |
prediction = predictor.predict(total_height, weight) | |
try: | |
brand = "Zara" # Default brand | |
chest = float(prediction.get("ChestWidth")) | |
waist = float(prediction.get("Waist")) | |
shoulder = float(prediction.get("ShoulderWidth")) | |
recommended_size, size_details = get_shirt_size( | |
brand, int(round(chest)), int(round(waist)), int(round(shoulder)) | |
) | |
computed_fit = ( | |
fit_type_input if fit_type_input is not None | |
else get_shirt_fit(shoulder, chest, waist) | |
) | |
response = { | |
"Brand": brand, | |
"RecommendedSize": recommended_size, | |
"SizeDetails": size_details, | |
"Fit": computed_fit, | |
"PredictedMeasurements": prediction | |
} | |
return response | |
except (TypeError, ValueError) as e: | |
return {"error": f"Error in size/fit calculation: {str(e)}"} | |
# Load dataset and train the model | |
try: | |
data = pd.read_csv("./data/bdm.csv") | |
data = data.dropna() | |
predictor.train_model(data) | |
logger.info("Model initialization completed successfully") | |
except Exception as e: | |
logger.error(f"Error during model initialization: {str(e)}") | |
raise | |
# Create Gradio interfaces | |
predict_interface = gr.Interface( | |
fn=gradio_predict, | |
inputs=[ | |
gr.Number(label="Total Height (cm)"), | |
gr.Number(label="Weight (kg)") | |
], | |
outputs="json", | |
title="Body Measurement Prediction" | |
) | |
predict_important_interface = gr.Interface( | |
fn=gradio_predict_important, | |
inputs=[ | |
gr.Number(label="Total Height (cm)"), | |
gr.Number(label="Weight (kg)"), | |
gr.Textbox(label="Fit Type") | |
], | |
outputs="json", | |
title="Important Body Measurement Prediction" | |
) | |
# Launch Gradio app | |
gr.TabbedInterface( | |
[predict_interface, predict_important_interface], | |
["Predict", "Predict Important"] | |
).launch() |