Delete app.py (#1)
Browse files- Delete app.py (8140b7ff1f785835599fe01917ec3e3503e168c4)
Co-authored-by: moncher <[email protected]>
app.py
DELETED
@@ -1,108 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import pandas as pd
|
3 |
-
from sklearn.model_selection import train_test_split
|
4 |
-
from sklearn.ensemble import RandomForestClassifier
|
5 |
-
from sklearn.metrics import classification_report, confusion_matrix
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
-
import seaborn as sns
|
8 |
-
|
9 |
-
# Set the title of the app
|
10 |
-
st.title("Cybersecurity Model Training App")
|
11 |
-
|
12 |
-
# Sidebar for dataset upload and parameter selection
|
13 |
-
st.sidebar.header("Upload Dataset and Parameters")
|
14 |
-
|
15 |
-
# File uploader for dataset
|
16 |
-
uploaded_file = st.sidebar.file_uploader("Upload your CSV dataset", type=["csv"])
|
17 |
-
|
18 |
-
# Function to load and display dataset
|
19 |
-
def load_data(file):
|
20 |
-
data = pd.read_csv(file)
|
21 |
-
st.write("Dataset Preview:")
|
22 |
-
st.dataframe(data.head())
|
23 |
-
return data
|
24 |
-
|
25 |
-
# Load dataset if file is uploaded
|
26 |
-
if uploaded_file is not None:
|
27 |
-
data = load_data(uploaded_file)
|
28 |
-
|
29 |
-
# Select target variable
|
30 |
-
target = st.sidebar.selectbox("Select the target variable", data.columns)
|
31 |
-
|
32 |
-
# Select features
|
33 |
-
features = st.sidebar.multiselect("Select feature variables", [col for col in data.columns if col != target])
|
34 |
-
|
35 |
-
# Split ratio
|
36 |
-
test_size = st.sidebar.slider("Test size ratio", 0.1, 0.5, 0.3)
|
37 |
-
|
38 |
-
# Model selection
|
39 |
-
model_choice = st.sidebar.selectbox("Select Model", ["Random Forest", "Support Vector Machine", "Logistic Regression"])
|
40 |
-
|
41 |
-
# Hyperparameters
|
42 |
-
if model_choice == "Random Forest":
|
43 |
-
n_estimators = st.sidebar.slider("Number of trees in the forest", 10, 100, 50)
|
44 |
-
max_depth = st.sidebar.slider("Maximum depth of the tree", 1, 20, 10)
|
45 |
-
elif model_choice == "Support Vector Machine":
|
46 |
-
c_value = st.sidebar.slider("Regularization parameter (C)", 0.01, 10.0, 1.0)
|
47 |
-
kernel = st.sidebar.selectbox("Kernel type", ["linear", "rbf", "poly"])
|
48 |
-
elif model_choice == "Logistic Regression":
|
49 |
-
c_value = st.sidebar.slider("Inverse of regularization strength (C)", 0.01, 10.0, 1.0)
|
50 |
-
|
51 |
-
# Train model button
|
52 |
-
if st.sidebar.button("Train Model"):
|
53 |
-
if len(features) == 0:
|
54 |
-
st.warning("Please select at least one feature.")
|
55 |
-
else:
|
56 |
-
X = data[features]
|
57 |
-
y = data[target]
|
58 |
-
|
59 |
-
# Split the data
|
60 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
|
61 |
-
|
62 |
-
# Initialize and train the model
|
63 |
-
if model_choice == "Random Forest":
|
64 |
-
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
|
65 |
-
elif model_choice == "Support Vector Machine":
|
66 |
-
from sklearn.svm import SVC
|
67 |
-
model = SVC(C=c_value, kernel=kernel, probability=True, random_state=42)
|
68 |
-
elif model_choice == "Logistic Regression":
|
69 |
-
from sklearn.linear_model import LogisticRegression
|
70 |
-
model = LogisticRegression(C=c_value, max_iter=1000, random_state=42)
|
71 |
-
|
72 |
-
model.fit(X_train, y_train)
|
73 |
-
|
74 |
-
# Make predictions
|
75 |
-
y_pred = model.predict(X_test)
|
76 |
-
|
77 |
-
# Display evaluation metrics
|
78 |
-
st.subheader("Model Evaluation")
|
79 |
-
st.text("Classification Report:")
|
80 |
-
st.text(classification_report(y_test, y_pred))
|
81 |
-
|
82 |
-
# Confusion matrix
|
83 |
-
st.text("Confusion Matrix:")
|
84 |
-
cm = confusion_matrix(y_test, y_pred)
|
85 |
-
fig, ax = plt.subplots()
|
86 |
-
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=model.classes_, yticklabels=model.classes_)
|
87 |
-
plt.xlabel("Predicted")
|
88 |
-
plt.ylabel("Actual")
|
89 |
-
st.pyplot(fig)
|
90 |
-
|
91 |
-
# Feature importance for Random Forest
|
92 |
-
if model_choice == "Random Forest":
|
93 |
-
st.subheader("Feature Importance")
|
94 |
-
feature_importance = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
|
95 |
-
feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
|
96 |
-
st.bar_chart(feature_importance.set_index('Feature'))
|
97 |
-
|
98 |
-
# Instructions when no file is uploaded
|
99 |
-
else:
|
100 |
-
st.write("Please upload a CSV file to get started.")
|
101 |
-
|
102 |
-
# Additional resources
|
103 |
-
st.sidebar.header("Additional Resources")
|
104 |
-
st.sidebar.markdown("""
|
105 |
-
- [Streamlit Documentation](https://docs.streamlit.io/)
|
106 |
-
- [Scikit-learn Documentation](https://scikit-learn.org/stable/user_guide.html)
|
107 |
-
- [Cybersecurity Datasets](https://www.kaggle.com/datasets?search=cybersecurity)
|
108 |
-
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|