Spaces:
Running
Running
File size: 3,428 Bytes
7a09bec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
# Set page title and icon
st.set_page_config(page_title="Anomaly Detection App", page_icon="π")
# Custom CSS for better styling
st.markdown("""
<style>
.stButton>button {
background-color: #4CAF50;
color: white;
font-weight: bold;
border-radius: 5px;
padding: 10px 20px;
}
.stDownloadButton>button {
background-color: #008CBA;
color: white;
font-weight: bold;
border-radius: 5px;
padding: 10px 20px;
}
.stMarkdown h1 {
color: #4CAF50;
}
.stMarkdown h2 {
color: #008CBA;
}
</style>
""", unsafe_allow_html=True)
# Title of the app
st.title("π Anomaly Detection App")
st.write("""
This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset.
Upload a CSV file, and the app will identify anomalies in the data.
""")
# Upload dataset
uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"])
if uploaded_file is not None:
# Load the dataset
df = pd.read_csv(uploaded_file)
# Show dataset preview
st.write("### Dataset Preview")
st.write(df.head())
# Select features for anomaly detection
st.write("### Select Features")
features = st.multiselect("Choose the features to use for anomaly detection", df.columns)
if features:
# Allow user to adjust contamination parameter
st.write("### Adjust Model Parameters")
contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01)
# Preprocess the data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])
# Train the Isolation Forest model
with st.spinner("Training the model and detecting anomalies..."):
model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42)
model.fit(df_scaled)
# Predict anomalies
predictions = model.predict(df_scaled)
df['anomaly'] = predictions # -1 for anomaly, 1 for normal
# Display results
st.write("### Anomaly Detection Results")
st.write(df)
# Filter and display only anomalies
anomalies = df[df['anomaly'] == -1]
st.write(f"### Detected Anomalies (Total: {len(anomalies)})")
st.write(anomalies)
# Visualize anomalies
st.write("### Visualize Anomalies")
if len(features) >= 2:
fig, ax = plt.subplots()
sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'})
st.pyplot(fig)
else:
st.warning("Please select at least 2 features to visualize anomalies.")
# Download results as CSV
st.write("### Download Results")
st.download_button(
label="Download Results as CSV",
data=df.to_csv(index=False).encode('utf-8'),
file_name='anomaly_detection_results.csv',
mime='text/csv',
)
else:
st.write("Please upload a CSV file to get started.") |