import os import pandas as pd from sklearn.ensemble import IsolationForest import numpy as np from sklearn.model_selection import train_test_split import gradio as gr import matplotlib.pyplot as plt from skops import hub_utils import pickle import time #Data preparation n_samples, n_outliers = 120, 40 rng = np.random.RandomState(0) covariance = np.array([[0.5, -0.1], [0.7, 0.4]]) cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2]) # general deformed cluster cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2]) # spherical cluster outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2)) X = np.concatenate([cluster_1, cluster_2, outliers]) #120+120+40 = 280 with 2D y = np.concatenate( [np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)] ) def load_hf_model_hub(): repo_id="sklearn-docs/anomaly-detection" download_repo = "downloaded-model" hub_utils.download(repo_id=repo_id, dst=download_repo) time.sleep(2) loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb')) return loaded_model #Visualize the data as a scatter plot def visualize_input_data(): fig = plt.figure(1, facecolor="w", figsize=(5, 5)) scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") handles, labels = scatter.legend_elements() plt.axis("square") plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") plt.title("Gaussian inliers with \nuniformly distributed outliers") # plt.show() # plt.clear() return fig from sklearn.inspection import DecisionBoundaryDisplay def plot_decision_boundary(): # progress(0, desc="Starting...") # plt.clear() plt.clf() time.sleep(1) disp = DecisionBoundaryDisplay.from_estimator( loaded_model, X, response_method="predict", alpha=0.5, ) fig1 = plt.figure(1, facecolor="w", figsize=(5, 5)) scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") # disp.ax_. disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") handles, labels = scatter.legend_elements() disp.ax_.set_title("Binary decision boundary \nof IsolationForest") plt.axis("square") plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") # plt.savefig('decision_boundary.png',dpi=300, bbox_inches = "tight") return fig1 def plot_path_length(): plt.clf() time.sleep(1) disp = DecisionBoundaryDisplay.from_estimator( loaded_model, X, response_method="decision_function", alpha=0.5, ) fig2 = plt.figure(1, facecolor="w", figsize=(5, 5)) scatter = disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") handles, labels = scatter.legend_elements() disp.ax_.set_title("Path length decision boundary \nof IsolationForest") plt.axis("square") plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") plt.colorbar(disp.ax_.collections[1]) # plt.savefig('plot_path.png',dpi=300, bbox_inches = "tight") return fig2 title = " An example using IsolationForest for anomaly detection." with gr.Blocks(title=title) as demo: gr.Markdown(f"# {title}") gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py**") loaded_model = load_hf_model_hub() with gr.Tab("Visualize Input dataset"): btn = gr.Button(value="Visualize input dataset") btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') ) with gr.Tab("Plot Decision Boundary"): btn_decision = gr.Button(value="Plot decision boundary") btn_decision.click(plot_decision_boundary, outputs= gr.Plot(label='Plot decision boundary') ) with gr.Tab("Plot Path"): btn_path = gr.Button(value="Path length decision boundary") btn_path.click(plot_path_length, outputs= gr.Plot(label='Path length decision boundary') ) gr.Markdown( f"## Success") demo.launch()