Jayabalambika's picture
Update app.py
a1fddda
raw
history blame
4.04 kB
import os
import pandas as pd
from sklearn.ensemble import IsolationForest
import numpy as np
from sklearn.model_selection import train_test_split
import gradio as gr
import matplotlib.pyplot as plt
from skops import hub_utils
import pickle
import time
#Data preparation
n_samples, n_outliers = 120, 40
rng = np.random.RandomState(0)
covariance = np.array([[0.5, -0.1], [0.7, 0.4]])
cluster_1 = 0.4 * rng.randn(n_samples, 2) @ covariance + np.array([2, 2]) # general deformed cluster
cluster_2 = 0.3 * rng.randn(n_samples, 2) + np.array([-2, -2]) # spherical cluster
outliers = rng.uniform(low=-4, high=4, size=(n_outliers, 2))
X = np.concatenate([cluster_1, cluster_2, outliers]) #120+120+40 = 280 with 2D
y = np.concatenate(
[np.ones((2 * n_samples), dtype=int), -np.ones((n_outliers), dtype=int)]
)
def load_hf_model_hub():
repo_id="sklearn-docs/anomaly-detection"
download_repo = "downloaded-model"
hub_utils.download(repo_id=repo_id, dst=download_repo)
time.sleep(2)
loaded_model = pickle.load(open('./downloaded-model/isolation_forest.pkl', 'rb'))
return loaded_model
#Visualize the data as a scatter plot
def visualize_input_data():
fig = plt.figure(1, facecolor="w", figsize=(5, 5))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
handles, labels = scatter.legend_elements()
plt.axis("square")
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
plt.title("Gaussian inliers with \nuniformly distributed outliers")
# plt.show()
# plt.clear()
return fig
from sklearn.inspection import DecisionBoundaryDisplay
def plot_decision_boundary():
# progress(0, desc="Starting...")
# plt.clear()
plt.clf()
time.sleep(1)
disp = DecisionBoundaryDisplay.from_estimator(
loaded_model,
X,
response_method="predict",
alpha=0.5,
)
fig1 = plt.figure(1, facecolor="w", figsize=(5, 5))
scatter = plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
# disp.ax_.
disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
handles, labels = scatter.legend_elements()
disp.ax_.set_title("Binary decision boundary \nof IsolationForest")
plt.axis("square")
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
# plt.savefig('decision_boundary.png',dpi=300, bbox_inches = "tight")
return fig1
def plot_path_length():
plt.clf()
time.sleep(1)
disp = DecisionBoundaryDisplay.from_estimator(
loaded_model,
X,
response_method="decision_function",
alpha=0.5,
)
fig2 = plt.figure(1, facecolor="w", figsize=(5, 5))
scatter = disp.ax_.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
handles, labels = scatter.legend_elements()
disp.ax_.set_title("Path length decision boundary \nof IsolationForest")
plt.axis("square")
plt.legend(handles=handles, labels=["outliers", "inliers"], title="true class")
plt.colorbar(disp.ax_.collections[1])
# plt.savefig('plot_path.png',dpi=300, bbox_inches = "tight")
return fig2
title = " An example using IsolationForest for anomaly detection."
with gr.Blocks(title=title) as demo:
gr.Markdown(f"# {title}")
gr.Markdown(" **https://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py**")
loaded_model = load_hf_model_hub()
with gr.Tab("Visualize Input dataset"):
btn = gr.Button(value="Visualize input dataset")
btn.click(visualize_input_data, outputs= gr.Plot(label='Visualizing input dataset') )
with gr.Tab("Plot Decision Boundary"):
btn_decision = gr.Button(value="Plot decision boundary")
btn_decision.click(plot_decision_boundary, outputs= gr.Plot(label='Plot decision boundary') )
with gr.Tab("Plot Path"):
btn_path = gr.Button(value="Path length decision boundary")
btn_path.click(plot_path_length, outputs= gr.Plot(label='Path length decision boundary') )
gr.Markdown( f"## Success")
demo.launch()