Spaces:
Runtime error
Runtime error
| from sklearn.pipeline import make_pipeline | |
| from sklearn.preprocessing import PolynomialFeatures, StandardScaler | |
| import numpy as np | |
| from sklearn.datasets import make_regression | |
| import pandas as pd | |
| from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge | |
| import matplotlib.pyplot as plt | |
| from matplotlib.colors import SymLogNorm | |
| import gradio as gr | |
| import seaborn as sns | |
| X, y, true_weights = make_regression( | |
| n_samples=100, | |
| n_features=100, | |
| n_informative=10, | |
| noise=8, | |
| coef=True, | |
| random_state=42, | |
| ) | |
| # Fit the regressors | |
| # ------------------ | |
| # | |
| # We now fit both Bayesian models and the OLS to later compare the models' | |
| # coefficients. | |
| def fit_regression_models(n_iter=30, X=X, y=y, true_weights=true_weights): | |
| olr = LinearRegression().fit(X, y) | |
| print(f"inside fit_regression n_iter={n_iter}") | |
| brr = BayesianRidge(compute_score=True, n_iter=n_iter).fit(X, y) | |
| ard = ARDRegression(compute_score=True, n_iter=n_iter).fit(X, y) | |
| df = pd.DataFrame( | |
| { | |
| "Weights of true generative process": true_weights, | |
| "ARDRegression": ard.coef_, | |
| "BayesianRidge": brr.coef_, | |
| "LinearRegression": olr.coef_, | |
| } | |
| ) | |
| return df, olr, brr, ard | |
| # %% | |
| # Plot the true and estimated coefficients | |
| # ---------------------------------------- | |
| # | |
| # Now we compare the coefficients of each model with the weights of | |
| # the true generative model. | |
| def visualize_coefficients(df=None): | |
| fig = plt.figure(figsize=(10, 6)) | |
| ax = sns.heatmap( | |
| df.T, | |
| norm=SymLogNorm(linthresh=10e-4, vmin=-80, vmax=80), | |
| cbar_kws={"label": "coefficients' values"}, | |
| cmap="seismic_r", | |
| ) | |
| plt.ylabel("linear model") | |
| plt.xlabel("coefficients") | |
| plt.tight_layout(rect=(0, 0, 1, 0.95)) | |
| _ = plt.title("Models' coefficients") | |
| return fig | |
| # %% | |
| # Due to the added noise, none of the models recover the true weights. Indeed, | |
| # all models always have more than 10 non-zero coefficients. Compared to the OLS | |
| # estimator, the coefficients using a Bayesian Ridge regression are slightly | |
| # shifted toward zero, which stabilises them. The ARD regression provides a | |
| # sparser solution: some of the non-informative coefficients are set exactly to | |
| # zero, while shifting others closer to zero. Some non-informative coefficients | |
| # are still present and retain large values. | |
| # %% | |
| # Plot the marginal log-likelihood | |
| # -------------------------------- | |
| def plot_marginal_log_likelihood(ard=None, brr=None, n_iter=30): | |
| fig = plt.figure(figsize=(10, 6)) | |
| ard_scores = -np.array(ard.scores_) | |
| brr_scores = -np.array(brr.scores_) | |
| # print(f"ard_scores = {ard_scores}") | |
| # print(f"brr_scores = {brr_scores}") | |
| plt.plot(ard_scores, color="navy", label="ARD") | |
| plt.plot(brr_scores, color="red", label="BayesianRidge") | |
| plt.ylabel("Log-likelihood") | |
| plt.xlabel("Iterations") | |
| plt.xlim(1, n_iter) | |
| plt.legend() | |
| _ = plt.title("Models log-likelihood") | |
| print("fig inside plot marginal = ", fig) | |
| return fig | |
| def make_regression_comparison_plot(n_iter=30): | |
| # print(f"n_iter = {n_iter}") | |
| # fit models | |
| df, olr, brr, ard = fit_regression_models(n_iter=n_iter, X=X, y=y, true_weights=true_weights) | |
| # print(f"df = {df}") | |
| # get figure | |
| fig = visualize_coefficients(df=df) | |
| return fig | |
| def make_log_likelihood_plot(n_iter=30): | |
| # print(f"n_iter = {n_iter}") | |
| # fit models | |
| df, olr, brr, ard = fit_regression_models(n_iter=n_iter, X=X, y=y, true_weights=true_weights) | |
| # print(f"df = {df}") | |
| # get figure | |
| fig = plot_marginal_log_likelihood(ard=ard, brr=brr, n_iter=n_iter) | |
| print(f"fig = {fig}") | |
| return fig | |
| # visualize coefficients | |
| # # %% | |
| # # Indeed, both models minimize the log-likelihood up to an arbitrary cutoff | |
| # # defined by the `n_iter` parameter. | |
| # # | |
| # # Bayesian regressions with polynomial feature expansion | |
| # # ====================================================== | |
| # Generate synthetic dataset | |
| # -------------------------- | |
| # We create a target that is a non-linear function of the input feature. | |
| # Noise following a standard uniform distribution is added. | |
| rng = np.random.RandomState(0) | |
| n_samples = 110 | |
| # sort the data to make plotting easier later | |
| g_X = np.sort(-10 * rng.rand(n_samples) + 10) | |
| noise = rng.normal(0, 1, n_samples) * 1.35 | |
| g_y = np.sqrt(g_X) * np.sin(g_X) + noise | |
| full_data = pd.DataFrame({"input_feature": g_X, "target": g_y}) | |
| g_X = g_X.reshape((-1, 1)) | |
| # extrapolation | |
| X_plot = np.linspace(10, 10.4, 10) | |
| y_plot = np.sqrt(X_plot) * np.sin(X_plot) | |
| X_plot = np.concatenate((g_X, X_plot.reshape((-1, 1)))) | |
| y_plot = np.concatenate((g_y - noise, y_plot)) | |
| # %% | |
| # Fit the regressors | |
| # ------------------ | |
| # | |
| # Here we try a degree 10 polynomial to potentially overfit, though the bayesian | |
| # linear models regularize the size of the polynomial coefficients. As | |
| # `fit_intercept=True` by default for | |
| # :class:`~sklearn.linear_model.ARDRegression` and | |
| # :class:`~sklearn.linear_model.BayesianRidge`, then | |
| # :class:`~sklearn.preprocessing.PolynomialFeatures` should not introduce an | |
| # additional bias feature. By setting `return_std=True`, the bayesian regressors | |
| # return the standard deviation of the posterior distribution for the model | |
| # parameters. | |
| #TODO - make this function that can be adapted with the gr.slider | |
| def generate_polynomial_dataset(degree = 10): | |
| ard_poly = make_pipeline( | |
| PolynomialFeatures(degree=degree, include_bias=False), | |
| StandardScaler(), | |
| ARDRegression(), | |
| ).fit(g_X, g_y) | |
| brr_poly = make_pipeline( | |
| PolynomialFeatures(degree=degree, include_bias=False), | |
| StandardScaler(), | |
| BayesianRidge(), | |
| ).fit(g_X, g_y) | |
| y_ard, y_ard_std = ard_poly.predict(X_plot, return_std=True) | |
| y_brr, y_brr_std = brr_poly.predict(X_plot, return_std=True) | |
| return y_ard, y_ard_std, y_brr, y_brr_std | |
| # %% | |
| # Plotting polynomial regressions with std errors of the scores | |
| # ------------------------------------------------------------- | |
| def visualize_bayes_regressions_polynomial_features(degree = 10): | |
| #TODO - get data dynamically from the gr.slider | |
| y_ard, y_ard_std, y_brr, y_brr_std = generate_polynomial_dataset(degree) | |
| fig = plt.figure(figsize=(10, 6)) | |
| ax = sns.scatterplot( | |
| data=full_data, x="input_feature", y="target", color="black", alpha=0.75) | |
| ax.plot(X_plot, y_plot, color="black", label="Ground Truth") | |
| ax.plot(X_plot, y_brr, color="red", label="BayesianRidge with polynomial features") | |
| ax.plot(X_plot, y_ard, color="navy", label="ARD with polynomial features") | |
| ax.fill_between( | |
| X_plot.ravel(), | |
| y_ard - y_ard_std, | |
| y_ard + y_ard_std, | |
| color="navy", | |
| alpha=0.3, | |
| ) | |
| ax.fill_between( | |
| X_plot.ravel(), | |
| y_brr - y_brr_std, | |
| y_brr + y_brr_std, | |
| color="red", | |
| alpha=0.3, | |
| ) | |
| ax.legend() | |
| _ = ax.set_title("Polynomial fit of a non-linear feature") | |
| # print(f"ax = {ax}") | |
| return fig | |
| # def make_polynomial_comparison_plot(): | |
| # return fig | |
| title = " Illustration of Comparing Linear Bayesian Regressors with synthetic data" | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(f"# {title}") | |
| gr.Markdown(""" This example shows a comparison of two different bayesian regressors: | |
| Automatic Relevance Determination - ARD see [sklearn-docs](https://scikit-learn.org/stable/modules/linear_model.html#automatic-relevance-determination) | |
| Bayesian Ridge Regression - see [sklearn-docs](https://scikit-learn.org/stable/modules/linear_model.html#bayesian-ridge-regression) | |
| The tutorial is split into sections, with the first comparing model coeffecients produced by Ordinary Least Squares (OLS), Bayesian Ridge Regression, and ARD with the known true coefficients. For this | |
| We generated a dataset where X and y are linearly linked: 10 of the features of X will be used to generate y. The other features are not useful at predicting y. | |
| n addition, we generate a dataset where n_samples == n_features. Such a setting is challenging for an OLS model and leads potentially to arbitrary large weights. | |
| Having a prior on the weights and a penalty alleviates the problem. Finally, gaussian noise is added. | |
| For the final tab, we investigate bayesian regressors with polynomial features and generate an additional dataset where the target is a non-linear function of the input feature, with | |
| added noise following a standard uniform distribution. | |
| For further details please see the sklearn docs: | |
| """) | |
| gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_ard.html#sphx-glr-auto-examples-linear-model-plot-ard-py)** <br>") | |
| with gr.Tab("Plot true and estimated coefficients"): | |
| with gr.Row(): | |
| n_iter = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_iterations") | |
| btn = gr.Button(value="Plot true and estimated coefficients") | |
| btn.click(make_regression_comparison_plot, inputs = [n_iter], outputs= gr.Plot(label='Plot true and estimated coefficients') ) | |
| gr.Markdown( | |
| """ | |
| # Details | |
| One can observe that with the added noise, none of the models can perfectly recover the coefficients of the original model. All models have more thab 10 non-zero coefficients, | |
| where only 10 are useful. The Bayesian Ridge Regression manages to recover most of the coefficients, while the ARD is more conservative. | |
| """) | |
| with gr.Tab("Plot marginal log likelihoods"): | |
| with gr.Row(): | |
| n_iter = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_iterations") | |
| btn = gr.Button(value="Plot marginal log likelihoods") | |
| btn.click(make_log_likelihood_plot, inputs = [n_iter], outputs= gr.Plot(label='Plot marginal log likelihoods') ) | |
| gr.Markdown( | |
| """ | |
| # Confirm with marginal log likelihoods | |
| Both ARD and Bayesian Ridge minimized the log-likelihood upto an arbitrary cuttoff defined the the n_iter parameter. | |
| """ | |
| ) | |
| with gr.Tab("Plot bayesian regression with polynomial features"): | |
| with gr.Row(): | |
| degree = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_degrees") | |
| btn = gr.Button(value="Plot bayesian regression with polynomial features") | |
| btn.click(visualize_bayes_regressions_polynomial_features, inputs = [degree], outputs= gr.Plot(label='Plot bayesian regression with polynomial features') ) | |
| gr.Markdown( | |
| """ | |
| # Details | |
| Here we try a degree 10 polynomial to potentially overfit, though the bayesian linear models regularize the size of the polynomial coefficients. | |
| As fit_intercept=True by default for ARDRegression and BayesianRidge, then PolynomialFeatures should not introduce an additional bias feature. By setting return_std=True, | |
| the bayesian regressors return the standard deviation of the posterior distribution for the model parameters. | |
| """) | |
| demo.launch() |