import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.ensemble import IsolationForest from sklearn.preprocessing import StandardScaler # Set page title and icon st.set_page_config(page_title="Anomaly Detection App", page_icon="🔍") # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) # Title of the app st.title("🔍 Anomaly Detection App") st.write(""" This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset. Upload a CSV file, and the app will identify anomalies in the data. """) # Upload dataset uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"]) if uploaded_file is not None: # Load the dataset df = pd.read_csv(uploaded_file) # Show dataset preview st.write("### Dataset Preview") st.write(df.head()) # Select features for anomaly detection st.write("### Select Features") features = st.multiselect("Choose the features to use for anomaly detection", df.columns) if features: # Allow user to adjust contamination parameter st.write("### Adjust Model Parameters") contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01) # Preprocess the data scaler = StandardScaler() df_scaled = scaler.fit_transform(df[features]) # Train the Isolation Forest model with st.spinner("Training the model and detecting anomalies..."): model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42) model.fit(df_scaled) # Predict anomalies predictions = model.predict(df_scaled) df['anomaly'] = predictions # -1 for anomaly, 1 for normal # Display results st.write("### Anomaly Detection Results") st.write(df) # Filter and display only anomalies anomalies = df[df['anomaly'] == -1] st.write(f"### Detected Anomalies (Total: {len(anomalies)})") st.write(anomalies) # Visualize anomalies st.write("### Visualize Anomalies") if len(features) >= 2: fig, ax = plt.subplots() sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'}) st.pyplot(fig) else: st.warning("Please select at least 2 features to visualize anomalies.") # Download results as CSV st.write("### Download Results") st.download_button( label="Download Results as CSV", data=df.to_csv(index=False).encode('utf-8'), file_name='anomaly_detection_results.csv', mime='text/csv', ) else: st.write("Please upload a CSV file to get started.")