Tzetha commited on
Commit
7a09bec
Β·
verified Β·
1 Parent(s): 1921351

Upload FIles

Browse files
Files changed (2) hide show
  1. app.py +105 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.ensemble import IsolationForest
7
+ from sklearn.preprocessing import StandardScaler
8
+
9
+ # Set page title and icon
10
+ st.set_page_config(page_title="Anomaly Detection App", page_icon="πŸ”")
11
+
12
+ # Custom CSS for better styling
13
+ st.markdown("""
14
+ <style>
15
+ .stButton>button {
16
+ background-color: #4CAF50;
17
+ color: white;
18
+ font-weight: bold;
19
+ border-radius: 5px;
20
+ padding: 10px 20px;
21
+ }
22
+ .stDownloadButton>button {
23
+ background-color: #008CBA;
24
+ color: white;
25
+ font-weight: bold;
26
+ border-radius: 5px;
27
+ padding: 10px 20px;
28
+ }
29
+ .stMarkdown h1 {
30
+ color: #4CAF50;
31
+ }
32
+ .stMarkdown h2 {
33
+ color: #008CBA;
34
+ }
35
+ </style>
36
+ """, unsafe_allow_html=True)
37
+
38
+ # Title of the app
39
+ st.title("πŸ” Anomaly Detection App")
40
+ st.write("""
41
+ This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset.
42
+ Upload a CSV file, and the app will identify anomalies in the data.
43
+ """)
44
+
45
+ # Upload dataset
46
+ uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"])
47
+
48
+ if uploaded_file is not None:
49
+ # Load the dataset
50
+ df = pd.read_csv(uploaded_file)
51
+
52
+ # Show dataset preview
53
+ st.write("### Dataset Preview")
54
+ st.write(df.head())
55
+
56
+ # Select features for anomaly detection
57
+ st.write("### Select Features")
58
+ features = st.multiselect("Choose the features to use for anomaly detection", df.columns)
59
+
60
+ if features:
61
+ # Allow user to adjust contamination parameter
62
+ st.write("### Adjust Model Parameters")
63
+ contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01)
64
+
65
+ # Preprocess the data
66
+ scaler = StandardScaler()
67
+ df_scaled = scaler.fit_transform(df[features])
68
+
69
+ # Train the Isolation Forest model
70
+ with st.spinner("Training the model and detecting anomalies..."):
71
+ model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42)
72
+ model.fit(df_scaled)
73
+
74
+ # Predict anomalies
75
+ predictions = model.predict(df_scaled)
76
+ df['anomaly'] = predictions # -1 for anomaly, 1 for normal
77
+
78
+ # Display results
79
+ st.write("### Anomaly Detection Results")
80
+ st.write(df)
81
+
82
+ # Filter and display only anomalies
83
+ anomalies = df[df['anomaly'] == -1]
84
+ st.write(f"### Detected Anomalies (Total: {len(anomalies)})")
85
+ st.write(anomalies)
86
+
87
+ # Visualize anomalies
88
+ st.write("### Visualize Anomalies")
89
+ if len(features) >= 2:
90
+ fig, ax = plt.subplots()
91
+ sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'})
92
+ st.pyplot(fig)
93
+ else:
94
+ st.warning("Please select at least 2 features to visualize anomalies.")
95
+
96
+ # Download results as CSV
97
+ st.write("### Download Results")
98
+ st.download_button(
99
+ label="Download Results as CSV",
100
+ data=df.to_csv(index=False).encode('utf-8'),
101
+ file_name='anomaly_detection_results.csv',
102
+ mime='text/csv',
103
+ )
104
+ else:
105
+ st.write("Please upload a CSV file to get started.")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ scikit-learn
2
+ streamlit
3
+ pandas
4
+ numpy