File size: 3,718 Bytes
564fdde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import gradio as gr
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.utils import shuffle
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from skops.hub_utils import download
import joblib
import shutil

# load dataset
def load_ames_housing():
    df = fetch_openml(name="house_prices", as_frame=True, parser="pandas")
    X = df.data
    y = df.target

    features = [
        "YrSold",
        "HeatingQC",
        "Street",
        "YearRemodAdd",
        "Heating",
        "MasVnrType",
        "BsmtUnfSF",
        "Foundation",
        "MasVnrArea",
        "MSSubClass",
        "ExterQual",
        "Condition2",
        "GarageCars",
        "GarageType",
        "OverallQual",
        "TotalBsmtSF",
        "BsmtFinSF1",
        "HouseStyle",
        "MiscFeature",
        "MoSold",
    ]

    X = X.loc[:, features]
    X, y = shuffle(X, y, random_state=0)

    X = X.iloc[:600]
    y = y.iloc[:600]
    return X, np.log(y)

def stacked_model(model1,model2,model3):
    X, y = load_ames_housing()
    estimators = []
    for model in [model1,model2,model3]:
        download(repo_id="haizad/ames-housing-lasso-predictor", dst='temp_dir')
        pipeline = joblib.load( "temp_dir/model.pkl")
        estimators.append((model.split('/')[-1], pipeline))
        shutil.rmtree("temp_dir")

    stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV())

    # plot and compare the performance of the single models and the stacked model
    import time
    import matplotlib.pyplot as plt
    from sklearn.metrics import PredictionErrorDisplay
    from sklearn.model_selection import cross_validate, cross_val_predict

    fig, axs = plt.subplots(2, 2, figsize=(9, 7))
    axs = np.ravel(axs)

    for ax, (name, est) in zip(
        axs, estimators + [("Stacking Regressor", stacking_regressor)]
    ):
        scorers = {"R2": "r2", "MAE": "neg_mean_absolute_error"}

        start_time = time.time()
        scores = cross_validate(
            est, X, y, scoring=list(scorers.values()), n_jobs=-1, verbose=0
        )

        elapsed_time = time.time() - start_time

        y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)
        scores = {
            key: (
                f"{np.abs(np.mean(scores[f'test_{value}'])):.2f} +- "
                f"{np.std(scores[f'test_{value}']):.2f}"
            )
            for key, value in scorers.items()
        }

        display = PredictionErrorDisplay.from_predictions(
            y_true=y,
            y_pred=y_pred,
            kind="actual_vs_predicted",
            ax=ax,
            scatter_kwargs={"alpha": 0.2, "color": "tab:blue"},
            line_kwargs={"color": "tab:red"},
        )
        ax.set_title(f"{name}\nEvaluation in {elapsed_time:.2f} seconds")

        for name, score in scores.items():
            ax.plot([], [], " ", label=f"{name}: {score}")
        ax.legend(loc="upper left")

    fig.suptitle("Single predictors versus stacked predictors")
    fig.tight_layout()
    fig.subplots_adjust(top=0.9)
    return fig

title = "Multi-class AdaBoosted Decision Trees"
with gr.Blocks(title=title) as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown("This app demonstrates the Multi-class AdaBoosted Decision Trees")

    model1 = gr.Textbox(label="Repo id of first model")
    model2 = gr.Textbox(label="Repo id of second model")
    model3 = gr.Textbox(label="Repo id of third model")
    plot = gr.Plot()
    stack_btn = gr.Button("Stack")
    stack_btn.click(fn=stacked_model, inputs=[model1,model2,model3], outputs=[plot])

demo.launch()