import os import pandas as pd from sklearn.ensemble import IsolationForest import numpy as np from sklearn.model_selection import train_test_split import gradio as gr import matplotlib.pyplot as plt from skops import hub_utils import pickle import time #Data preparation n_samples, n_outliers = 120, 40 rng = np.random.RandomState(0) covariance = np.array([[0.5, -0.1], [0.7, 0.4]]) cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2]) # general deformed cluster cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2]) # spherical cluster outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2)) X = np.concatenate([cluster_1, cluster_2, outliers]) #120+120+40 = 280 with 2D y = np.concatenate( [np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)] ) def load_hf_model_hub(): ''' Load the directory containing pretrained model and files from the model repository ''' repo_id="sklearn-docs/anomaly-detection" download_repo = "downloaded-model" hub_utils.download(repo_id=repo_id, dst=download_repo) time.sleep(2) loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb')) return loaded_model #Visualize the data as a scatter plot def visualize_input_data(): fig = plt.figure(1, facecolor="w", figsize=(5, 5)) scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") handles, labels = scatter.legend_elements() plt.axis("square") plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") plt.title("Gaussian inliers with \nuniformly distributed outliers") return fig title = " An example using IsolationForest for anomaly detection." description1 = "The isolation forest is an Ensemble of Isolation trees and it isolates the datapoints using recursive random partitioning." description2 = "In case of outliers the number of splits required is greater than those required for inliers." description3 = "We will use the toy dataset as given in the scikit-learn page for Isolation Forest." with gr.Blocks(title=title) as demo: gr.Markdown(f"# {title}") gr.Markdown( """ The isolation forest is an Ensemble of Isolation trees and it isolates the data points using recursive random partitioning. In case of outliers the number of splits required is greater than those required for inliers. We will use the toy dataset for our educational demo as given in the scikit-learn page for Isolation Forest. """) gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py**") loaded_model = load_hf_model_hub() with gr.Tab("# Visualize Input dataset"): btn = gr.Button(value="Visualize input dataset") btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') ) with gr.Tab("# Plot Decision Boundary"): image_decision = gr.Image('./downloaded-model/decision_boundary.png') with gr.Tab("# Plot Path"): image_path = gr.Image('./downloaded-model/plot_path.png') gr.Markdown( f"## Success") demo.launch()