File size: 3,428 Bytes
7a09bec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

# Set page title and icon
st.set_page_config(page_title="Anomaly Detection App", page_icon="πŸ”")

# Custom CSS for better styling
st.markdown("""

<style>

    .stButton>button {

        background-color: #4CAF50;

        color: white;

        font-weight: bold;

        border-radius: 5px;

        padding: 10px 20px;

    }

    .stDownloadButton>button {

        background-color: #008CBA;

        color: white;

        font-weight: bold;

        border-radius: 5px;

        padding: 10px 20px;

    }

    .stMarkdown h1 {

        color: #4CAF50;

    }

    .stMarkdown h2 {

        color: #008CBA;

    }

</style>

""", unsafe_allow_html=True)

# Title of the app
st.title("πŸ” Anomaly Detection App")
st.write("""

This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset.

Upload a CSV file, and the app will identify anomalies in the data.

""")

# Upload dataset
uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"])

if uploaded_file is not None:
    # Load the dataset
    df = pd.read_csv(uploaded_file)
    
    # Show dataset preview
    st.write("### Dataset Preview")
    st.write(df.head())

    # Select features for anomaly detection
    st.write("### Select Features")
    features = st.multiselect("Choose the features to use for anomaly detection", df.columns)

    if features:
        # Allow user to adjust contamination parameter
        st.write("### Adjust Model Parameters")
        contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01)
        
        # Preprocess the data
        scaler = StandardScaler()
        df_scaled = scaler.fit_transform(df[features])

        # Train the Isolation Forest model
        with st.spinner("Training the model and detecting anomalies..."):
            model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42)
            model.fit(df_scaled)

            # Predict anomalies
            predictions = model.predict(df_scaled)
            df['anomaly'] = predictions  # -1 for anomaly, 1 for normal

        # Display results
        st.write("### Anomaly Detection Results")
        st.write(df)

        # Filter and display only anomalies
        anomalies = df[df['anomaly'] == -1]
        st.write(f"### Detected Anomalies (Total: {len(anomalies)})")
        st.write(anomalies)

        # Visualize anomalies
        st.write("### Visualize Anomalies")
        if len(features) >= 2:
            fig, ax = plt.subplots()
            sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'})
            st.pyplot(fig)
        else:
            st.warning("Please select at least 2 features to visualize anomalies.")

        # Download results as CSV
        st.write("### Download Results")
        st.download_button(
            label="Download Results as CSV",
            data=df.to_csv(index=False).encode('utf-8'),
            file_name='anomaly_detection_results.csv',
            mime='text/csv',
        )
else:
    st.write("Please upload a CSV file to get started.")