Spaces:

sklearn-docs
/

IsolationForest-Model-for-Anomaly-Detection

Runtime error

IsolationForest-Model-for-Anomaly-Detection

File size: 4,042 Bytes

303f2f6
 
 
 
 
 
 
 
783a1d4
64eb9a6
 
fd886b3
64eb9a6
303f2f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1fddda
 
 
 
 
 
 
 
303f2f6
 
783a1d4
 
 
 
 
 
 
 
a1fddda
783a1d4
303f2f6
fd886b3
64eb9a6
 
 
 
fd886b3
a1fddda
 
 
fd886b3
 
64eb9a6
fd886b3
64eb9a6
 
 
 
fd886b3
a1fddda
 
64eb9a6
 
 
 
 
 
 
fd886b3
 
64eb9a6
a1fddda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64eb9a6
 
783a1d4
303f2f6
783a1d4
 
 
303f2f6
783a1d4
a1fddda
 
303f2f6
a1fddda
 
 
303f2f6
a1fddda
 
 
 
 
 
 
64eb9a6
96f4763
64eb9a6
783a1d4

import os

import pandas as pd
from sklearn.ensemble import IsolationForest

import numpy as np
from sklearn.model_selection import train_test_split
import gradio as gr
import matplotlib.pyplot as plt
from skops import hub_utils
import pickle
import time



#Data preparation
n_samples, n_outliers = 120, 40
rng = np.random.RandomState(0)
covariance = np.array([[0.5, -0.1], [0.7, 0.4]])
cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2])  # general deformed cluster
cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2])  # spherical cluster
outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2))

X = np.concatenate([cluster_1, cluster_2, outliers]) #120+120+40 = 280 with 2D
y = np.concatenate(
    [np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)]
)

def load_hf_model_hub():
    repo_id="sklearn-docs/anomaly-detection"
    download_repo = "downloaded-model"
    hub_utils.download(repo_id=repo_id, dst=download_repo)
    time.sleep(2)
    loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb'))
    return loaded_model

#Visualize the data as a scatter plot

def visualize_input_data():
    fig = plt.figure(1, facecolor="w", figsize=(5, 5))
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
    handles, labels = scatter.legend_elements()
    plt.axis("square")
    plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
    plt.title("Gaussian inliers with \nuniformly distributed outliers")
    # plt.show()
    # plt.clear()
    return fig




from sklearn.inspection import DecisionBoundaryDisplay

def plot_decision_boundary():
  # progress(0, desc="Starting...")
  # plt.clear()
  plt.clf()
  time.sleep(1)

  disp = DecisionBoundaryDisplay.from_estimator(
    loaded_model,
    X,
    response_method="predict",
    alpha=0.5,
)
  fig1 = plt.figure(1, facecolor="w", figsize=(5, 5))
  scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
  # disp.ax_.
  disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
  handles, labels = scatter.legend_elements()
  disp.ax_.set_title("Binary decision boundary \nof IsolationForest")
  plt.axis("square")
  
  plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
  # plt.savefig('decision_boundary.png',dpi=300, bbox_inches = "tight")

  return fig1

def plot_path_length():
  plt.clf()

  time.sleep(1)
  disp = DecisionBoundaryDisplay.from_estimator(
    loaded_model,
    X,
    response_method="decision_function",
    alpha=0.5,
  )
  fig2 = plt.figure(1, facecolor="w", figsize=(5, 5))
  scatter = disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
  handles, labels = scatter.legend_elements()
  disp.ax_.set_title("Path length decision boundary \nof IsolationForest")
  plt.axis("square")
  plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
  plt.colorbar(disp.ax_.collections[1])
  # plt.savefig('plot_path.png',dpi=300, bbox_inches = "tight")
  return fig2



title = " An example using IsolationForest for anomaly detection."

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"# {title}")
    

    gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py**")
    
    loaded_model = load_hf_model_hub()

    with gr.Tab("Visualize Input dataset"):
      btn = gr.Button(value="Visualize input dataset")
      btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') )

    with gr.Tab("Plot Decision Boundary"):
      btn_decision = gr.Button(value="Plot decision boundary")
      btn_decision.click(plot_decision_boundary, outputs= gr.Plot(label='Plot decision boundary') )
    
    with gr.Tab("Plot Path"):
      btn_path = gr.Button(value="Path length decision boundary")
      btn_path.click(plot_path_length, outputs= gr.Plot(label='Path length decision boundary') )
    
    
    gr.Markdown( f"## Success")
demo.launch()