haizad's picture
Update descriptions and add default value
e599435
raw
history blame
4.4 kB
import gradio as gr
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.utils import shuffle
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from skops.hub_utils import download
import joblib
import shutil
# load dataset
def load_ames_housing():
df = fetch_openml(name="house_prices", as_frame=True, parser="pandas")
X = df.data
y = df.target
features = [
"YrSold",
"HeatingQC",
"Street",
"YearRemodAdd",
"Heating",
"MasVnrType",
"BsmtUnfSF",
"Foundation",
"MasVnrArea",
"MSSubClass",
"ExterQual",
"Condition2",
"GarageCars",
"GarageType",
"OverallQual",
"TotalBsmtSF",
"BsmtFinSF1",
"HouseStyle",
"MiscFeature",
"MoSold",
]
X = X.loc[:, features]
X, y = shuffle(X, y, random_state=0)
X = X.iloc[:600]
y = y.iloc[:600]
return X, np.log(y)
def stacked_model(model1,model2,model3):
X, y = load_ames_housing()
estimators = []
for model in [model1,model2,model3]:
download(repo_id=model, dst='temp_dir')
pipeline = joblib.load( "temp_dir/model.pkl")
estimators.append((model.split('/')[-1], pipeline))
shutil.rmtree("temp_dir")
stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV())
# plot and compare the performance of the single models and the stacked model
import time
import matplotlib.pyplot as plt
from sklearn.metrics import PredictionErrorDisplay
from sklearn.model_selection import cross_validate, cross_val_predict
fig, axs = plt.subplots(2, 2, figsize=(9, 7))
axs = np.ravel(axs)
for ax, (name, est) in zip(
axs, estimators + [("Stacking Regressor", stacking_regressor)]
):
scorers = {"R2": "r2", "MAE": "neg_mean_absolute_error"}
start_time = time.time()
scores = cross_validate(
est, X, y, scoring=list(scorers.values()), n_jobs=-1, verbose=0
)
elapsed_time = time.time() - start_time
y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)
scores = {
key: (
f"{np.abs(np.mean(scores[f'test_{value}'])):.2f} +- "
f"{np.std(scores[f'test_{value}']):.2f}"
)
for key, value in scorers.items()
}
display = PredictionErrorDisplay.from_predictions(
y_true=y,
y_pred=y_pred,
kind="actual_vs_predicted",
ax=ax,
scatter_kwargs={"alpha": 0.2, "color": "tab:blue"},
line_kwargs={"color": "tab:red"},
)
ax.set_title(f"{name}\nEvaluation in {elapsed_time:.2f} seconds")
for name, score in scores.items():
ax.plot([], [], " ", label=f"{name}: {score}")
ax.legend(loc="upper left")
fig.suptitle("Single predictors versus stacked predictors")
fig.tight_layout()
fig.subplots_adjust(top=0.9)
return fig
title = "Combine predictors using stacking"
with gr.Blocks(title=title) as demo:
gr.Markdown(f"## {title}")
gr.Markdown("""
This app demonstrates combining 3 predictors trained on Ames housing dataset from OpenML using stacking and Ridge estimator as final estimator.
Stacking uses a meta-learning algorithm to learn how to best combine the predictions from trained models. The OpenML Ames housing dataset is a processed version of the 'Ames Iowa Housing'with 81 features.
This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_stack_predictors.html#sphx-glr-auto-examples-ensemble-plot-stack-predictors-py)
""")
model1 = gr.Textbox(label="Repo id of first model", value="https://huggingface.co/haizad/ames-housing-random-forest-predictor")
model2 = gr.Textbox(label="Repo id of second model", value="https://huggingface.co/haizad/ames-housing-gbdt-predictor")
model3 = gr.Textbox(label="Repo id of third model", value="https://huggingface.co/haizad/ames-housing-lasso-predictor")
plot = gr.Plot()
stack_btn = gr.Button("Stack")
stack_btn.click(fn=stacked_model, inputs=[model1,model2,model3], outputs=[plot])
demo.launch()