Spaces:

ibm-research
/

fm4m-eval-demo

Running

App Files Files Community

fm4m-eval-demo / property_prediction_helpers.py

ipd

init

5306c2a 17 days ago

raw

history blame

8.74 kB

	import matplotlib.pyplot as plt
	import numpy as np
	import os
	import pandas as pd
	import xgboost as xgb
	from sklearn.kernel_ridge import KernelRidge
	from sklearn.linear_model import LinearRegression
	from sklearn.svm import SVR

	os.environ["OMP_MAX_ACTIVE_LEVELS"] = "1"

	import models.fm4m as fm4m


	# Function to create model based on user input
	def _create_model(
	model_name, max_depth=None, n_estimators=None, alpha=None, degree=None, kernel=None
	):
	if model_name == "XGBClassifier":
	model = xgb.XGBClassifier(
	objective='binary:logistic',
	eval_metric='auc',
	max_depth=max_depth,
	n_estimators=n_estimators,
	alpha=alpha,
	)
	elif model_name == "SVR":
	model = SVR(degree=degree, kernel=kernel)
	elif model_name == "Kernel Ridge":
	model = KernelRidge(alpha=alpha, degree=degree, kernel=kernel)
	elif model_name == "Linear Regression":
	model = LinearRegression()
	elif model_name == "Default - Auto":
	return "Default Settings"
	else:
	return "Model not supported."

	return f"{model_name} * {model.get_params()}"


	# Function to handle model creation based on input parameters
	def create_downstream_model(state):
	model_name, max_depth, n_estimators, alpha, degree, kernel = (
	state["model_name"],
	state.get("max_depth"),
	state.get("n_estimators"),
	state.get("alpha"),
	state.get("degree"),
	state.get("kernel"),
	)
	if model_name == "XGBClassifier":
	return _create_model(
	model_name,
	max_depth=max_depth,
	n_estimators=n_estimators,
	alpha=alpha,
	)
	elif model_name == "SVR":
	return _create_model(model_name, degree=degree, kernel=kernel)
	elif model_name == "Kernel Ridge":
	return _create_model(model_name, alpha=alpha, degree=degree, kernel=kernel)
	elif model_name == "Linear Regression":
	return _create_model(model_name)
	elif model_name == "Default - Auto":
	return _create_model(model_name)


	# Function to display evaluation score
	def display_eval(selected_models, dataset, task_type, state, plot_state):
	downstream = create_downstream_model(state)
	state = plot_state
	result = None

	try:
	downstream_model = downstream.split("*")[0].lstrip()
	downstream_model = downstream_model.rstrip()
	hyp_param = downstream.split("*")[-1].lstrip()
	hyp_param = hyp_param.rstrip()
	hyp_param = hyp_param.replace("nan", "float('nan')")
	params = eval(hyp_param)
	except:
	downstream_model = downstream.split("*")[0].lstrip()
	downstream_model = downstream_model.rstrip()
	params = None

	try:
	if not selected_models:
	return "Please select at least one enabled model."

	if len(selected_models) > 1:
	if task_type == "Classification":
	if downstream_model == "Default Settings":
	downstream_model = "DefaultClassifier"
	params = None
	(
	result,
	state["roc_auc"],
	state["fpr"],
	state["tpr"],
	state["x_batch"],
	state["y_batch"],
	) = fm4m.multi_modal(
	model_list=selected_models,
	downstream_model=downstream_model,
	params=params,
	dataset=dataset,
	)

	elif task_type == "Regression":
	if downstream_model == "Default Settings":
	downstream_model = "DefaultRegressor"
	params = None
	(
	result,
	state["RMSE"],
	state["y_batch_test"],
	state["y_prob"],
	state["x_batch"],
	state["y_batch"],
	) = fm4m.multi_modal(
	model_list=selected_models,
	downstream_model=downstream_model,
	params=params,
	dataset=dataset,
	)

	else:
	if task_type == "Classification":
	if downstream_model == "Default Settings":
	downstream_model = "DefaultClassifier"
	params = None
	(
	result,
	state["roc_auc"],
	state["fpr"],
	state["tpr"],
	state["x_batch"],
	state["y_batch"],
	) = fm4m.single_modal(
	model=selected_models[0],
	downstream_model=downstream_model,
	params=params,
	dataset=dataset,
	)

	elif task_type == "Regression":
	if downstream_model == "Default Settings":
	downstream_model = "DefaultRegressor"
	params = None
	(
	result,
	state["RMSE"],
	state["y_batch_test"],
	state["y_prob"],
	state["x_batch"],
	state["y_batch"],
	) = fm4m.single_modal(
	model=selected_models[0],
	downstream_model=downstream_model,
	params=params,
	dataset=dataset,
	)

	except Exception as e:
	return f"An error occurred: {e}"
	return result or "Data & Model Setting is incorrect"


	# Function to handle plot display
	def display_plot(plot_type, state):
	fig, ax = plt.subplots()

	if plot_type == "Latent Space":
	x_batch, y_batch = state.get("x_batch"), state.get("y_batch")
	ax.set_title("T-SNE Plot")
	class_0 = x_batch
	class_1 = y_batch

	plt.scatter(class_1[:, 0], class_1[:, 1], c='red', label='Class 1')
	plt.scatter(class_0[:, 0], class_0[:, 1], c='blue', label='Class 0')

	ax.set_xlabel('Feature 1')
	ax.set_ylabel('Feature 2')
	ax.set_title('Dataset Distribution')

	elif plot_type == "ROC-AUC":
	roc_auc, fpr, tpr = state.get("roc_auc"), state.get("fpr"), state.get("tpr")
	ax.set_title("ROC-AUC Curve")
	try:
	ax.plot(
	fpr,
	tpr,
	color='darkorange',
	lw=2,
	label=f'ROC curve (area = {roc_auc:.4f})',
	)
	ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
	ax.set_xlim([0.0, 1.0])
	ax.set_ylim([0.0, 1.05])
	except:
	pass
	ax.set_xlabel('False Positive Rate')
	ax.set_ylabel('True Positive Rate')
	ax.set_title('Receiver Operating Characteristic')
	ax.legend(loc='lower right')

	elif plot_type == "Parity Plot":
	RMSE, y_batch_test, y_prob = (
	state.get("RMSE"),
	state.get("y_batch_test"),
	state.get("y_prob"),
	)
	ax.set_title("Parity plot")

	# change format
	try:
	print(y_batch_test)
	print(y_prob)
	y_batch_test = np.array(y_batch_test, dtype=float)
	y_prob = np.array(y_prob, dtype=float)
	ax.scatter(
	y_batch_test,
	y_prob,
	color="blue",
	label=f"Predicted vs Actual (RMSE: {RMSE:.4f})",
	)
	min_val = min(min(y_batch_test), min(y_prob))
	max_val = max(max(y_batch_test), max(y_prob))
	ax.plot([min_val, max_val], [min_val, max_val], 'r-')

	except:
	y_batch_test = []
	y_prob = []
	RMSE = None
	print(y_batch_test)
	print(y_prob)

	ax.set_xlabel('Actual Values')
	ax.set_ylabel('Predicted Values')

	ax.legend(loc='lower right')
	return fig


	# Function to handle evaluation and logging
	def evaluate_and_log(selected_models, dataset, task_type, log_df, state):
	log_df = log_df[log_df['id'] != '']
	id = len(log_df) + 1
	plot_state = {"roc_auc": None, "RMSE": None, "x_batch": None}
	state["results"][id] = plot_state
	eval_output = display_eval(selected_models, dataset, task_type, state, plot_state)

	new_entry_df = pd.DataFrame(
	[
	{
	"id": id,
	"Model": " + ".join(selected_models),
	"Score": eval_output.replace(" Score", ""),
	}
	]
	)
	log_df = pd.concat([log_df, new_entry_df])
	return log_df