Tzetha's picture
Upload FIles
7a09bec verified
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
# Set page title and icon
st.set_page_config(page_title="Anomaly Detection App", page_icon="πŸ”")
# Custom CSS for better styling
st.markdown("""
<style>
.stButton>button {
background-color: #4CAF50;
color: white;
font-weight: bold;
border-radius: 5px;
padding: 10px 20px;
}
.stDownloadButton>button {
background-color: #008CBA;
color: white;
font-weight: bold;
border-radius: 5px;
padding: 10px 20px;
}
.stMarkdown h1 {
color: #4CAF50;
}
.stMarkdown h2 {
color: #008CBA;
}
</style>
""", unsafe_allow_html=True)
# Title of the app
st.title("πŸ” Anomaly Detection App")
st.write("""
This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset.
Upload a CSV file, and the app will identify anomalies in the data.
""")
# Upload dataset
uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"])
if uploaded_file is not None:
# Load the dataset
df = pd.read_csv(uploaded_file)
# Show dataset preview
st.write("### Dataset Preview")
st.write(df.head())
# Select features for anomaly detection
st.write("### Select Features")
features = st.multiselect("Choose the features to use for anomaly detection", df.columns)
if features:
# Allow user to adjust contamination parameter
st.write("### Adjust Model Parameters")
contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01)
# Preprocess the data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])
# Train the Isolation Forest model
with st.spinner("Training the model and detecting anomalies..."):
model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42)
model.fit(df_scaled)
# Predict anomalies
predictions = model.predict(df_scaled)
df['anomaly'] = predictions # -1 for anomaly, 1 for normal
# Display results
st.write("### Anomaly Detection Results")
st.write(df)
# Filter and display only anomalies
anomalies = df[df['anomaly'] == -1]
st.write(f"### Detected Anomalies (Total: {len(anomalies)})")
st.write(anomalies)
# Visualize anomalies
st.write("### Visualize Anomalies")
if len(features) >= 2:
fig, ax = plt.subplots()
sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'})
st.pyplot(fig)
else:
st.warning("Please select at least 2 features to visualize anomalies.")
# Download results as CSV
st.write("### Download Results")
st.download_button(
label="Download Results as CSV",
data=df.to_csv(index=False).encode('utf-8'),
file_name='anomaly_detection_results.csv',
mime='text/csv',
)
else:
st.write("Please upload a CSV file to get started.")