Spaces:
Sleeping
Sleeping
File size: 2,695 Bytes
3d81d0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
# Load the dataset
st.title("SVM Kernel Performance Comparison")
uploaded_file = 'data\overlapped.csv'
if uploaded_file:
df = pd.read_csv(uploaded_file)
st.write("### Data Preview")
st.dataframe(df)
# Assuming the last column is the target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Plot overlapped clusters
st.write("### Cluster Visualization")
fig, ax = plt.subplots()
scatter = sns.scatterplot(x=X.iloc[:, 0], y=X.iloc[:, 1], hue=y, palette='coolwarm', alpha=0.6)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Overlapped Clusters")
st.pyplot(fig)
# Function to train SVM and get performance metrics
def evaluate_svm(kernel_type):
model = SVC(kernel=kernel_type)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred, output_dict=True)
return cm, cr
# Streamlit tabs
tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
for tab, kernel in zip([tab1, tab2, tab3], ["linear", "poly", "rbf"]):
with tab:
st.write(f"## SVM with {kernel.capitalize()} Kernel")
cm, cr = evaluate_svm(kernel)
# Confusion matrix
st.write("### Confusion Matrix")
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
st.pyplot(fig)
# Classification report
st.write("### Classification Report")
st.dataframe(pd.DataFrame(cr).transpose())
# Explanation
explanation = {
"linear": "The linear kernel performs well when the data is linearly separable.",
"poly": "The polynomial kernel captures more complex relationships but may overfit with high-degree polynomials.",
"rbf": "The RBF kernel is effective in capturing non-linear relationships in the data but requires careful tuning of parameters."
}
st.markdown(f"**Performance Analysis:** {explanation[kernel]}")
|