Canstralian tess3d commited on
Commit
ce12cc5
·
verified ·
1 Parent(s): c51907e

Delete app.py (#1)

Browse files

- Delete app.py (8140b7ff1f785835599fe01917ec3e3503e168c4)


Co-authored-by: moncher <[email protected]>

Files changed (1) hide show
  1. app.py +0 -108
app.py DELETED
@@ -1,108 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from sklearn.model_selection import train_test_split
4
- from sklearn.ensemble import RandomForestClassifier
5
- from sklearn.metrics import classification_report, confusion_matrix
6
- import matplotlib.pyplot as plt
7
- import seaborn as sns
8
-
9
- # Set the title of the app
10
- st.title("Cybersecurity Model Training App")
11
-
12
- # Sidebar for dataset upload and parameter selection
13
- st.sidebar.header("Upload Dataset and Parameters")
14
-
15
- # File uploader for dataset
16
- uploaded_file = st.sidebar.file_uploader("Upload your CSV dataset", type=["csv"])
17
-
18
- # Function to load and display dataset
19
- def load_data(file):
20
- data = pd.read_csv(file)
21
- st.write("Dataset Preview:")
22
- st.dataframe(data.head())
23
- return data
24
-
25
- # Load dataset if file is uploaded
26
- if uploaded_file is not None:
27
- data = load_data(uploaded_file)
28
-
29
- # Select target variable
30
- target = st.sidebar.selectbox("Select the target variable", data.columns)
31
-
32
- # Select features
33
- features = st.sidebar.multiselect("Select feature variables", [col for col in data.columns if col != target])
34
-
35
- # Split ratio
36
- test_size = st.sidebar.slider("Test size ratio", 0.1, 0.5, 0.3)
37
-
38
- # Model selection
39
- model_choice = st.sidebar.selectbox("Select Model", ["Random Forest", "Support Vector Machine", "Logistic Regression"])
40
-
41
- # Hyperparameters
42
- if model_choice == "Random Forest":
43
- n_estimators = st.sidebar.slider("Number of trees in the forest", 10, 100, 50)
44
- max_depth = st.sidebar.slider("Maximum depth of the tree", 1, 20, 10)
45
- elif model_choice == "Support Vector Machine":
46
- c_value = st.sidebar.slider("Regularization parameter (C)", 0.01, 10.0, 1.0)
47
- kernel = st.sidebar.selectbox("Kernel type", ["linear", "rbf", "poly"])
48
- elif model_choice == "Logistic Regression":
49
- c_value = st.sidebar.slider("Inverse of regularization strength (C)", 0.01, 10.0, 1.0)
50
-
51
- # Train model button
52
- if st.sidebar.button("Train Model"):
53
- if len(features) == 0:
54
- st.warning("Please select at least one feature.")
55
- else:
56
- X = data[features]
57
- y = data[target]
58
-
59
- # Split the data
60
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
61
-
62
- # Initialize and train the model
63
- if model_choice == "Random Forest":
64
- model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
65
- elif model_choice == "Support Vector Machine":
66
- from sklearn.svm import SVC
67
- model = SVC(C=c_value, kernel=kernel, probability=True, random_state=42)
68
- elif model_choice == "Logistic Regression":
69
- from sklearn.linear_model import LogisticRegression
70
- model = LogisticRegression(C=c_value, max_iter=1000, random_state=42)
71
-
72
- model.fit(X_train, y_train)
73
-
74
- # Make predictions
75
- y_pred = model.predict(X_test)
76
-
77
- # Display evaluation metrics
78
- st.subheader("Model Evaluation")
79
- st.text("Classification Report:")
80
- st.text(classification_report(y_test, y_pred))
81
-
82
- # Confusion matrix
83
- st.text("Confusion Matrix:")
84
- cm = confusion_matrix(y_test, y_pred)
85
- fig, ax = plt.subplots()
86
- sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=model.classes_, yticklabels=model.classes_)
87
- plt.xlabel("Predicted")
88
- plt.ylabel("Actual")
89
- st.pyplot(fig)
90
-
91
- # Feature importance for Random Forest
92
- if model_choice == "Random Forest":
93
- st.subheader("Feature Importance")
94
- feature_importance = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
95
- feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
96
- st.bar_chart(feature_importance.set_index('Feature'))
97
-
98
- # Instructions when no file is uploaded
99
- else:
100
- st.write("Please upload a CSV file to get started.")
101
-
102
- # Additional resources
103
- st.sidebar.header("Additional Resources")
104
- st.sidebar.markdown("""
105
- - [Streamlit Documentation](https://docs.streamlit.io/)
106
- - [Scikit-learn Documentation](https://scikit-learn.org/stable/user_guide.html)
107
- - [Cybersecurity Datasets](https://www.kaggle.com/datasets?search=cybersecurity)
108
- """)