louiecerv's picture
fixed the problem of the decision boundary plot
08befbb
raw
history blame
3.93 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
# Function to visualize decision boundary
def visualize_classifier(classifier, X, y, title=''):
min_x, max_x = X[:, 0].min() - 1.0, X[:, 0].max() + 1.0
min_y, max_y = X[:, 1].min() - 1.0, X[:, 1].max() + 1.0
mesh_step_size = 0.01
x_vals, y_vals = np.meshgrid(np.arange(min_x, max_x, mesh_step_size),
np.arange(min_y, max_y, mesh_step_size))
output = classifier.predict(np.c_[x_vals.ravel(), y_vals.ravel()])
output = output.reshape(x_vals.shape)
fig, ax = plt.subplots()
ax.set_title(title)
ax.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray, shading='auto')
ax.scatter(X[:, 0], X[:, 1], c=y, s=75, edgecolors='black', linewidth=1, cmap=plt.cm.Paired)
ax.set_xlim(x_vals.min(), x_vals.max())
ax.set_ylim(y_vals.min(), y_vals.max())
ax.set_xticks(np.arange(int(X[:, 0].min() - 1), int(X[:, 0].max() + 1), 1.0))
ax.set_yticks(np.arange(int(X[:, 1].min() - 1), int(X[:, 1].max() + 1), 1.0))
st.pyplot(fig)
# Load the dataset
st.title("SVM Kernel Performance Comparison")
uploaded_file = 'data\overlapped.csv'
if uploaded_file:
df = pd.read_csv(uploaded_file)
st.write("### Data Preview")
st.dataframe(df)
# Assuming the last column is the target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Plot overlapped clusters
st.write("### Cluster Visualization")
fig, ax = plt.subplots()
scatter = sns.scatterplot(x=X.iloc[:, 0], y=X.iloc[:, 1], hue=y, palette='coolwarm', alpha=0.6)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Overlapped Clusters")
st.pyplot(fig)
# Function to train SVM and get performance metrics
def evaluate_svm(kernel_type):
model = SVC(kernel=kernel_type)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred, output_dict=True)
return model, cm, cr
# Streamlit tabs
tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
for tab, kernel in zip([tab1, tab2, tab3], ["linear", "poly", "rbf"]):
with tab:
st.write(f"## SVM with {kernel.capitalize()} Kernel")
model, cm, cr = evaluate_svm(kernel)
# Confusion matrix
st.write("### Confusion Matrix")
fig, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
st.pyplot(fig)
# Classification report
st.write("### Classification Report")
st.dataframe(pd.DataFrame(cr).transpose())
# Decision boundary
st.write("### Decision Boundary")
visualize_classifier(model, X.to_numpy(), y.to_numpy(), title=f"Decision Boundary - {kernel.capitalize()} Kernel")
# Explanation
explanation = {
"linear": "The linear kernel performs well when the data is linearly separable.",
"poly": "The polynomial kernel captures more complex relationships but may overfit with high-degree polynomials.",
"rbf": "The RBF kernel is effective in capturing non-linear relationships in the data but requires careful tuning of parameters."
}
st.markdown(f"**Performance Analysis:** {explanation[kernel]}")