Spaces:

nick-leland
/

rd2l_prediction

Sleeping

App Files Files Community

rd2l_prediction / app.py

nick-leland

Debugging

5c5f11f 6 months ago

raw

history blame

7.99 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import onnxruntime as ort
	import sys
	from pathlib import Path
	sys.path.append("rd2l_pred")
	from training_data_prep import list_format, modification, league_money, df_gen
	from feature_engineering import heroes, hero_information

	# Global variables for model and feature columns
	MODEL = None
	FEATURE_COLUMNS = None

	def load_model():
	"""Load the ONNX model and get input features"""
	global MODEL, FEATURE_COLUMNS
	try:
	model_path = Path("model/rd2l_forest.onnx")
	if not model_path.exists():
	return "Model file not found at: " + str(model_path)

	MODEL = ort.InferenceSession(str(model_path))

	# Load feature columns from prediction data
	try:
	FEATURE_COLUMNS = pd.read_csv("result_prediction_data_prepped.csv").columns.tolist()
	except:
	try:
	FEATURE_COLUMNS = pd.read_csv("prediction_data_prepped.csv").columns.tolist()
	except:
	return "Error: Could not find prediction data files to determine feature structure"

	return "Model loaded successfully"
	except Exception as e:
	return f"Error loading model: {str(e)}"

	def process_player_data(player_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5):
	"""Process player data similar to training pipeline"""
	try:
	# Clean player ID from URL if needed
	if "/" in player_id:
	player_id = player_id.split("/")[-1]

	# Create initial player series
	player_data = {
	"player_id": player_id,
	"mmr": float(mmr),
	"p1": int(comf_1),
	"p2": int(comf_2),
	"p3": int(comf_3),
	"p4": int(comf_4),
	"p5": int(comf_5)
	}

	# Read the example row from prediction_data_prepped.csv to get the expected structure
	try:
	pred_data = pd.read_csv("prediction_data_prepped.csv")
	print("\nReference columns from prediction_data_prepped.csv:")
	print(sorted(pred_data.columns.tolist()))
	print(f"Number of reference columns: {len(pred_data.columns)}")

	if not pred_data.empty:
	# Get column structure from the first row
	for col in pred_data.columns:
	if col not in player_data and col != 'Predicted_Cost': # Skip the target variable
	player_data[col] = 0
	except Exception as e:
	print(f"Warning - Error reading prediction data template: {str(e)}")

	# Get hero statistics using OpenDota API
	try:
	hero_stats = hero_information(player_id)
	player_data.update(hero_stats.to_dict())

	# Add season identifier to match training data format
	player_season = f"{player_id}_S34" # Assuming current season is 34
	temp_dict = {}
	temp_dict[player_season] = 1.0 # Set current season flag to 1.0
	player_data.update(temp_dict)

	except Exception as e:
	print(f"Warning - Error fetching hero data: {str(e)}")
	# If hero stats fail, add placeholder values
	player_data.update({
	"total_games_played": 0,
	"total_winrate": 0.0
	})

	# Convert to DataFrame for consistency with training
	df = pd.DataFrame([player_data])

	# Print out the columns we have in our processed data
	print("\nProcessed data columns:")
	print(sorted(df.columns.tolist()))
	print(f"Number of processed columns: {len(df.columns)}")

	# Find missing columns
	expected_cols = set(pred_data.columns) - {'Predicted_Cost'} # Remove target variable
	actual_cols = set(df.columns)

	missing_cols = expected_cols - actual_cols
	extra_cols = actual_cols - expected_cols

	if missing_cols:
	print("\nMissing columns:")
	print(sorted(list(missing_cols)))

	if extra_cols:
	print("\nExtra columns:")
	print(sorted(list(extra_cols)))

	# Ensure we have all needed columns and remove any extras
	for col in missing_cols:
	df[col] = 0
	df = df[list(expected_cols)]

	print(f"\nFinal number of columns: {len(df.columns)}")
	return df
	except Exception as e:
	return f"Error processing player data: {str(e)}"

	def predict_cost(user_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5):
	"""Main prediction function for Gradio interface"""
	try:
	# Check if model is loaded
	if MODEL is None:
	result = load_model()
	if not result.startswith("Model loaded"):
	return result

	# Process input data
	processed_data = process_player_data(user_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5)

	if isinstance(processed_data, str): # Error occurred
	return processed_data

	# Print debug information
	print("Processed data shape:", processed_data.shape)
	print("Processed data columns:", processed_data.columns.tolist())

	# Make prediction
	try:
	input_name = MODEL.get_inputs()[0].name
	prediction = MODEL.run(None, {input_name: processed_data.values.astype(np.float32)})[0]
	predicted_cost = round(float(prediction[0]), 2)
	except Exception as e:
	return f"Error during prediction: {str(e)}\nProcessed data shape: {processed_data.shape}"

	return f"""Predicted Cost: {predicted_cost}

	Player Details:
	- MMR: {mmr}
	- Position Comfort:
	* Pos 1: {comf_1}
	* Pos 2: {comf_2}
	* Pos 3: {comf_3}
	* Pos 4: {comf_4}
	* Pos 5: {comf_5}

	Note: This prediction is based on historical data and player statistics from OpenDota."""

	except Exception as e:
	return f"Error in prediction pipeline: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=predict_cost,
	inputs=[
	gr.Textbox(label="Player ID or Link to OpenDota/Dotabuff",
	placeholder="Enter player ID or full profile URL"),
	gr.Number(label="MMR", value=3000),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 1)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 2)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 3)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 4)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 5)")
	],
	examples=[
	["https://www.dotabuff.com/players/188649776", 6812, 5, 5, 4, 2, 1]
	],
	outputs=gr.Textbox(label="Prediction Results"),
	title="RD2L Player Cost Predictor",
	description="""This tool predicts the auction cost for RD2L players based on their MMR,
	position comfort levels, and historical performance data from OpenDota.
	Enter a player's OpenDota ID or profile URL along with their current stats
	to get a predicted cost.""",
	article="""### How it works
	- The predictor uses machine learning trained on historical RD2L draft data
	- Player statistics are fetched from OpenDota API
	- Position comfort levels range from 1 (least comfortable) to 5 (most comfortable)
	- Predictions are based on both current stats and historical performance

	### Notes
	- MMR should be the player's current solo MMR
	- Position comfort should reflect actual role experience
	- Predictions are estimates and may vary from actual draft results"""
	)

	# Load model on startup
	print(load_model())

	if __name__ == "__main__":
	demo.launch()