File size: 4,042 Bytes
303f2f6 783a1d4 64eb9a6 fd886b3 64eb9a6 303f2f6 a1fddda 303f2f6 783a1d4 a1fddda 783a1d4 303f2f6 fd886b3 64eb9a6 fd886b3 a1fddda fd886b3 64eb9a6 fd886b3 64eb9a6 fd886b3 a1fddda 64eb9a6 fd886b3 64eb9a6 a1fddda 64eb9a6 783a1d4 303f2f6 783a1d4 303f2f6 783a1d4 a1fddda 303f2f6 a1fddda 303f2f6 a1fddda 64eb9a6 96f4763 64eb9a6 783a1d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
import pandas as pd
from sklearn.ensemble import IsolationForest
import numpy as np
from sklearn.model_selection import train_test_split
import gradio as gr
import matplotlib.pyplot as plt
from skops import hub_utils
import pickle
import time
#Data preparation
n_samples, n_outliers = 120, 40
rng = np.random.RandomState(0)
covariance = np.array([[0.5, -0.1], [0.7, 0.4]])
cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2]) # general deformed cluster
cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2]) # spherical cluster
outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2))
X = np.concatenate([cluster_1, cluster_2, outliers]) #120+120+40 = 280 with 2D
y = np.concatenate(
[np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)]
)
def load_hf_model_hub():
repo_id="sklearn-docs/anomaly-detection"
download_repo = "downloaded-model"
hub_utils.download(repo_id=repo_id, dst=download_repo)
time.sleep(2)
loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb'))
return loaded_model
#Visualize the data as a scatter plot
def visualize_input_data():
fig = plt.figure(1, facecolor="w", figsize=(5, 5))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
handles, labels = scatter.legend_elements()
plt.axis("square")
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
plt.title("Gaussian inliers with \nuniformly distributed outliers")
# plt.show()
# plt.clear()
return fig
from sklearn.inspection import DecisionBoundaryDisplay
def plot_decision_boundary():
# progress(0, desc="Starting...")
# plt.clear()
plt.clf()
time.sleep(1)
disp = DecisionBoundaryDisplay.from_estimator(
loaded_model,
X,
response_method="predict",
alpha=0.5,
)
fig1 = plt.figure(1, facecolor="w", figsize=(5, 5))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
# disp.ax_.
disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
handles, labels = scatter.legend_elements()
disp.ax_.set_title("Binary decision boundary \nof IsolationForest")
plt.axis("square")
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
# plt.savefig('decision_boundary.png',dpi=300, bbox_inches = "tight")
return fig1
def plot_path_length():
plt.clf()
time.sleep(1)
disp = DecisionBoundaryDisplay.from_estimator(
loaded_model,
X,
response_method="decision_function",
alpha=0.5,
)
fig2 = plt.figure(1, facecolor="w", figsize=(5, 5))
scatter = disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
handles, labels = scatter.legend_elements()
disp.ax_.set_title("Path length decision boundary \nof IsolationForest")
plt.axis("square")
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
plt.colorbar(disp.ax_.collections[1])
# plt.savefig('plot_path.png',dpi=300, bbox_inches = "tight")
return fig2
title = " An example using IsolationForest for anomaly detection."
with gr.Blocks(title=title) as demo:
gr.Markdown(f"# {title}")
gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py**")
loaded_model = load_hf_model_hub()
with gr.Tab("Visualize Input dataset"):
btn = gr.Button(value="Visualize input dataset")
btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') )
with gr.Tab("Plot Decision Boundary"):
btn_decision = gr.Button(value="Plot decision boundary")
btn_decision.click(plot_decision_boundary, outputs= gr.Plot(label='Plot decision boundary') )
with gr.Tab("Plot Path"):
btn_path = gr.Button(value="Path length decision boundary")
btn_path.click(plot_path_length, outputs= gr.Plot(label='Path length decision boundary') )
gr.Markdown( f"## Success")
demo.launch() |