|
import os |
|
|
|
import pandas as pd |
|
from sklearn.ensemble import IsolationForest |
|
|
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
import gradio as gr |
|
import matplotlib.pyplot as plt |
|
from skops import hub_utils |
|
import pickle |
|
import time |
|
|
|
|
|
|
|
|
|
n_samples, n_outliers = 120, 40 |
|
rng = np.random.RandomState(0) |
|
covariance = np.array([[0.5, -0.1], [0.7, 0.4]]) |
|
cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2]) |
|
cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2]) |
|
outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2)) |
|
|
|
X = np.concatenate([cluster_1, cluster_2, outliers]) |
|
y = np.concatenate( |
|
[np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)] |
|
) |
|
|
|
def load_hf_model_hub(): |
|
repo_id="sklearn-docs/anomaly-detection" |
|
download_repo = "downloaded-model" |
|
hub_utils.download(repo_id=repo_id, dst=download_repo) |
|
time.sleep(2) |
|
loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb')) |
|
return loaded_model |
|
|
|
|
|
|
|
def visualize_input_data(): |
|
fig = plt.figure(1, facecolor="w", figsize=(5, 5)) |
|
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") |
|
handles, labels = scatter.legend_elements() |
|
plt.axis("square") |
|
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") |
|
plt.title("Gaussian inliers with \nuniformly distributed outliers") |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
|
from sklearn.inspection import DecisionBoundaryDisplay |
|
|
|
def plot_decision_boundary(): |
|
|
|
|
|
plt.clf() |
|
time.sleep(1) |
|
|
|
disp = DecisionBoundaryDisplay.from_estimator( |
|
loaded_model, |
|
X, |
|
response_method="predict", |
|
alpha=0.5, |
|
) |
|
fig1 = plt.figure(1, facecolor="w", figsize=(5, 5)) |
|
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") |
|
|
|
disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") |
|
handles, labels = scatter.legend_elements() |
|
disp.ax_.set_title("Binary decision boundary \nof IsolationForest") |
|
plt.axis("square") |
|
|
|
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") |
|
|
|
|
|
return fig1 |
|
|
|
def plot_path_length(): |
|
plt.clf() |
|
|
|
time.sleep(1) |
|
disp = DecisionBoundaryDisplay.from_estimator( |
|
loaded_model, |
|
X, |
|
response_method="decision_function", |
|
alpha=0.5, |
|
) |
|
fig2 = plt.figure(1, facecolor="w", figsize=(5, 5)) |
|
scatter = disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") |
|
handles, labels = scatter.legend_elements() |
|
disp.ax_.set_title("Path length decision boundary \nof IsolationForest") |
|
plt.axis("square") |
|
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class") |
|
plt.colorbar(disp.ax_.collections[1]) |
|
|
|
return fig2 |
|
|
|
|
|
|
|
title = " An example using IsolationForest for anomaly detection." |
|
|
|
with gr.Blocks(title=title) as demo: |
|
gr.Markdown(f"# {title}") |
|
|
|
|
|
gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py**") |
|
|
|
loaded_model = load_hf_model_hub() |
|
|
|
with gr.Tab("Visualize Input dataset"): |
|
btn = gr.Button(value="Visualize input dataset") |
|
btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') ) |
|
|
|
with gr.Tab("Plot Decision Boundary"): |
|
btn_decision = gr.Button(value="Plot decision boundary") |
|
btn_decision.click(plot_decision_boundary, outputs= gr.Plot(label='Plot decision boundary') ) |
|
|
|
with gr.Tab("Plot Path"): |
|
btn_path = gr.Button(value="Path length decision boundary") |
|
btn_path.click(plot_path_length, outputs= gr.Plot(label='Path length decision boundary') ) |
|
|
|
|
|
gr.Markdown( f"## Success") |
|
demo.launch() |