Spaces:

louiecerv
/

svm_kernel_comparison

Sleeping

App Files Files Community

svm_kernel_comparison / app.py

louiecerv

fixed the problem of the decision boundary plot

08befbb 7 months ago

raw

history blame

3.93 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.model_selection import train_test_split
	from sklearn.svm import SVC
	from sklearn.metrics import confusion_matrix, classification_report

	# Function to visualize decision boundary
	def visualize_classifier(classifier, X, y, title=''):
	min_x, max_x = X[:, 0].min() - 1.0, X[:, 0].max() + 1.0
	min_y, max_y = X[:, 1].min() - 1.0, X[:, 1].max() + 1.0
	mesh_step_size = 0.01
	x_vals, y_vals = np.meshgrid(np.arange(min_x, max_x, mesh_step_size),
	np.arange(min_y, max_y, mesh_step_size))
	output = classifier.predict(np.c_[x_vals.ravel(), y_vals.ravel()])
	output = output.reshape(x_vals.shape)
	fig, ax = plt.subplots()
	ax.set_title(title)
	ax.pcolormesh(x_vals, y_vals, output, cmap=plt.cm.gray, shading='auto')
	ax.scatter(X[:, 0], X[:, 1], c=y, s=75, edgecolors='black', linewidth=1, cmap=plt.cm.Paired)
	ax.set_xlim(x_vals.min(), x_vals.max())
	ax.set_ylim(y_vals.min(), y_vals.max())
	ax.set_xticks(np.arange(int(X[:, 0].min() - 1), int(X[:, 0].max() + 1), 1.0))
	ax.set_yticks(np.arange(int(X[:, 1].min() - 1), int(X[:, 1].max() + 1), 1.0))
	st.pyplot(fig)

	# Load the dataset
	st.title("SVM Kernel Performance Comparison")

	uploaded_file = 'data\overlapped.csv'
	if uploaded_file:
	df = pd.read_csv(uploaded_file)
	st.write("### Data Preview")
	st.dataframe(df)

	# Assuming the last column is the target
	X = df.iloc[:, :-1]
	y = df.iloc[:, -1]

	# Splitting dataset
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

	# Plot overlapped clusters
	st.write("### Cluster Visualization")
	fig, ax = plt.subplots()
	scatter = sns.scatterplot(x=X.iloc[:, 0], y=X.iloc[:, 1], hue=y, palette='coolwarm', alpha=0.6)
	plt.xlabel("Feature 1")
	plt.ylabel("Feature 2")
	plt.title("Overlapped Clusters")
	st.pyplot(fig)

	# Function to train SVM and get performance metrics
	def evaluate_svm(kernel_type):
	model = SVC(kernel=kernel_type)
	model.fit(X_train, y_train)
	y_pred = model.predict(X_test)
	cm = confusion_matrix(y_test, y_pred)
	cr = classification_report(y_test, y_pred, output_dict=True)
	return model, cm, cr

	# Streamlit tabs
	tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])

	for tab, kernel in zip([tab1, tab2, tab3], ["linear", "poly", "rbf"]):
	with tab:
	st.write(f"## SVM with {kernel.capitalize()} Kernel")
	model, cm, cr = evaluate_svm(kernel)

	# Confusion matrix
	st.write("### Confusion Matrix")
	fig, ax = plt.subplots()
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
	plt.xlabel("Predicted")
	plt.ylabel("Actual")
	plt.title("Confusion Matrix")
	st.pyplot(fig)

	# Classification report
	st.write("### Classification Report")
	st.dataframe(pd.DataFrame(cr).transpose())

	# Decision boundary
	st.write("### Decision Boundary")
	visualize_classifier(model, X.to_numpy(), y.to_numpy(), title=f"Decision Boundary - {kernel.capitalize()} Kernel")

	# Explanation
	explanation = {
	"linear": "The linear kernel performs well when the data is linearly separable.",
	"poly": "The polynomial kernel captures more complex relationships but may overfit with high-degree polynomials.",
	"rbf": "The RBF kernel is effective in capturing non-linear relationships in the data but requires careful tuning of parameters."
	}
	st.markdown(f"Performance Analysis: {explanation[kernel]}")