Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.ensemble import IsolationForest | |
from sklearn.preprocessing import StandardScaler | |
# Set page title and icon | |
st.set_page_config(page_title="Anomaly Detection App", page_icon="π") | |
# Custom CSS for better styling | |
st.markdown(""" | |
<style> | |
.stButton>button { | |
background-color: #4CAF50; | |
color: white; | |
font-weight: bold; | |
border-radius: 5px; | |
padding: 10px 20px; | |
} | |
.stDownloadButton>button { | |
background-color: #008CBA; | |
color: white; | |
font-weight: bold; | |
border-radius: 5px; | |
padding: 10px 20px; | |
} | |
.stMarkdown h1 { | |
color: #4CAF50; | |
} | |
.stMarkdown h2 { | |
color: #008CBA; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Title of the app | |
st.title("π Anomaly Detection App") | |
st.write(""" | |
This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset. | |
Upload a CSV file, and the app will identify anomalies in the data. | |
""") | |
# Upload dataset | |
uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"]) | |
if uploaded_file is not None: | |
# Load the dataset | |
df = pd.read_csv(uploaded_file) | |
# Show dataset preview | |
st.write("### Dataset Preview") | |
st.write(df.head()) | |
# Select features for anomaly detection | |
st.write("### Select Features") | |
features = st.multiselect("Choose the features to use for anomaly detection", df.columns) | |
if features: | |
# Allow user to adjust contamination parameter | |
st.write("### Adjust Model Parameters") | |
contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01) | |
# Preprocess the data | |
scaler = StandardScaler() | |
df_scaled = scaler.fit_transform(df[features]) | |
# Train the Isolation Forest model | |
with st.spinner("Training the model and detecting anomalies..."): | |
model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42) | |
model.fit(df_scaled) | |
# Predict anomalies | |
predictions = model.predict(df_scaled) | |
df['anomaly'] = predictions # -1 for anomaly, 1 for normal | |
# Display results | |
st.write("### Anomaly Detection Results") | |
st.write(df) | |
# Filter and display only anomalies | |
anomalies = df[df['anomaly'] == -1] | |
st.write(f"### Detected Anomalies (Total: {len(anomalies)})") | |
st.write(anomalies) | |
# Visualize anomalies | |
st.write("### Visualize Anomalies") | |
if len(features) >= 2: | |
fig, ax = plt.subplots() | |
sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'}) | |
st.pyplot(fig) | |
else: | |
st.warning("Please select at least 2 features to visualize anomalies.") | |
# Download results as CSV | |
st.write("### Download Results") | |
st.download_button( | |
label="Download Results as CSV", | |
data=df.to_csv(index=False).encode('utf-8'), | |
file_name='anomaly_detection_results.csv', | |
mime='text/csv', | |
) | |
else: | |
st.write("Please upload a CSV file to get started.") |