Spaces:

nick-leland
/

rd2l_prediction

Sleeping

App Files Files Community

rd2l_prediction / app.py

nick-leland

Production change

67bbd16 6 months ago

raw

history blame

10.7 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import onnxruntime as ort
	import sys
	from pathlib import Path
	sys.path.append("rd2l_pred")
	from feature_engineering import heroes, hero_information

	# Define expected columns
	EXPECTED_COLUMNS = ['mmr', 'p1', 'p2', 'p3', 'p4', 'p5', 'count', 'mean', 'std', 'min', 'max', 'sum',
	'total_games_played', 'total_winrate']

	# Add games columns
	games_ids = list(range(1, 24)) + [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
	58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
	75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
	92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
	107, 108, 109, 110, 111, 112, 113, 114, 119, 120, 121, 123, 126,
	128, 129, 131, 135, 136, 137, 138, 145]

	EXPECTED_COLUMNS.extend([f'games_{i}' for i in games_ids])
	EXPECTED_COLUMNS.extend([f'winrate_{i}' for i in games_ids])

	def load_reference_data(player_id):
	"""Load reference prediction data and input features"""
	try:
	# Read the full CSV to get both input features and prediction
	ref_df = pd.read_csv(f"{player_id}.csv", encoding='utf-8', index_col=0)

	# Remove the prediction row and convert to a proper format
	features = ref_df[ref_df.index != 'Predicted_Cost'].iloc[:, 0]
	prediction = ref_df.loc['Predicted_Cost', f"{player_id}_S34"]

	print("\nReference data loaded:")
	for idx in ['mmr', 'p1', 'p2', 'p3', 'p4', 'p5', 'count', 'mean', 'std', 'min', 'max', 'sum']:
	if idx in features.index:
	print(f"{idx}: {features[idx]}")

	return features, float(prediction)
	except Exception as e:
	print(f"Could not load reference data: {e}")
	return None, None

	def prepare_single_player_data(user_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5):
	"""Creates a DataFrame in the expected format for the model"""
	try:
	# Extract player_id from URL if needed
	player_id = user_id.split("/")[-1] if "/" in user_id else user_id

	# Create initial data dictionary with zeros for all columns
	data = {col: 0 for col in EXPECTED_COLUMNS}

	# Fill in the basic features
	data.update({
	'mmr': float(mmr),
	'p1': int(comf_1),
	'p2': int(comf_2),
	'p3': int(comf_3),
	'p4': int(comf_4),
	'p5': int(comf_5),
	})

	# Get hero statistics using OpenDota API
	hero_stats = hero_information(player_id)

	# Add hero statistics
	if hero_stats is not None:
	data['total_games_played'] = hero_stats.get('total_games_played', 0)
	data['total_winrate'] = hero_stats.get('total_winrate', 0)

	# Fill in the games and winrate columns from hero_stats
	for key, value in hero_stats.items():
	if key in EXPECTED_COLUMNS:
	data[key] = value

	# Get statistics from league data
	try:
	captains_df = pd.read_csv("S34 Draft Sheet - Captains.csv", encoding='utf-8')
	bucks_stats = captains_df["Buck's Bucks"].describe()
	cents_stats = captains_df["Crub Cents"].describe()

	# Print stats for debugging
	print("\nLeague Money Statistics:")
	print("Buck's Bucks stats:", bucks_stats)
	print("Crub Cents stats:", cents_stats)

	# Combine stats from both currencies
	combined_stats = {
	# 'count': bucks_stats['count'] + cents_stats['count'],
	'count': 9.0,
	# 'mean': (bucks_stats['mean'] + cents_stats['mean']) / 2,
	'mean': 489.3333333333333,
	# 'std': (bucks_stats['std'] + cents_stats['std']) / 2,
	'std': 77.4483698989204,
	# 'min': min(bucks_stats['min'], cents_stats['min']),
	'min': 352.0,
	# 'max': max(bucks_stats['max'], cents_stats['max']),
	'max': 593.0,
	# 'sum': bucks_stats['count'] * bucks_stats['mean'] + cents_stats['count'] * cents_stats['mean']
	'sum': 4404.0
	}
	print("Combined stats:", combined_stats)
	data.update(combined_stats)
	except Exception as e:
	print(f"Error reading captains data: {e}")
	stats = {
	'count': 1,
	'mean': mmr / 200,
	'std': mmr / 400,
	'min': mmr / 250,
	'max': mmr / 150,
	'sum': mmr / 200
	}
	data.update(stats)

	# Convert to DataFrame
	df = pd.DataFrame([data])

	# Ensure columns are in correct order
	df = df[EXPECTED_COLUMNS]

	print(f"DataFrame shape: {df.shape}")
	print("Missing columns:", set(EXPECTED_COLUMNS) - set(df.columns))

	# Print key feature values for debugging
	print("\nKey feature values:")
	print(f"MMR: {df['mmr'].iloc[0]}")
	print(f"Position comfort: {df[['p1', 'p2', 'p3', 'p4', 'p5']].iloc[0].tolist()}")
	print(f"Money stats: {df[['count', 'mean', 'std', 'min', 'max', 'sum']].iloc[0].tolist()}")
	print(f"Total games: {df['total_games_played'].iloc[0]}")
	print(f"Total winrate: {df['total_winrate'].iloc[0]}")

	return df

	except Exception as e:
	print(f"Error in data preparation: {e}")
	raise e

	def predict_cost(user_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5):
	"""Main prediction function for Gradio interface"""
	try:
	# Extract player_id for reference data
	player_id = user_id.split("/")[-1] if "/" in user_id else user_id
	reference_features, reference_prediction = load_reference_data(player_id)

	# Prepare the player data
	processed_data = prepare_single_player_data(user_id, mmr, comf_1, comf_2, comf_3, comf_4, comf_5)

	print("\nComparing processed data with reference:")
	if reference_features is not None:
	our_data = processed_data.iloc[0]
	for idx in ['mmr', 'p1', 'p2', 'p3', 'p4', 'p5', 'count', 'mean', 'std', 'min', 'max', 'sum']:
	our_val = our_data[idx]
	ref_val = reference_features[idx] if idx in reference_features.index else "N/A"
	print(f"{idx}:")
	print(f" Our value: {our_val}")
	print(f" Ref value: {ref_val}")
	if our_val != ref_val and ref_val != "N/A":
	print(f" * MISMATCH *")
	# Load and use the model
	model_path = Path("model/rd2l_forest.onnx")
	if not model_path.exists():
	return f"Model file not found at: {model_path}"

	session = ort.InferenceSession(str(model_path))

	# Make prediction
	input_name = session.get_inputs()[0].name
	prediction = session.run(None, {input_name: processed_data.values.astype(np.float32)})[0]
	print("\nPrediction output:", prediction)
	predicted_cost = round(float(prediction[0]), 2)
	print("Predicted cost:", predicted_cost)

	hero_stats = processed_data.iloc[0]
	total_games = hero_stats.get('total_games_played', 'N/A')
	total_winrate = hero_stats.get('total_winrate', 'N/A')

	comparison = ""
	if reference_prediction is not None:
	diff = abs(predicted_cost - reference_prediction)
	comparison = f"""
	Reference Cost: {reference_prediction}
	Difference: {diff:.2f} ({(diff/reference_prediction*100):.1f}% {'higher' if predicted_cost > reference_prediction else 'lower'})"""


	return f"""Predicted Cost: {predicted_cost}"""
	# return f"""Predicted Cost: {predicted_cost}{comparison}
	#
	# Player Details:
	# - MMR: {mmr}
	# - Position Comfort:
	# * Pos 1: {comf_1}
	# * Pos 2: {comf_2}
	# * Pos 3: {comf_3}
	# * Pos 4: {comf_4}
	# * Pos 5: {comf_5}
	#
	# Player Statistics:
	# - Total Games: {total_games}
	# - Overall Winrate: {total_winrate:.1%} if isinstance(total_winrate, float) else 'N/A'
	#
	# Note: This prediction is based on historical data and player statistics from OpenDota."""
	#
	# except Exception as e:
	# return f"Error in prediction pipeline: {str(e)}\n\nDebug info:\n{type(e).__name__}: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=predict_cost,
	inputs=[
	gr.Textbox(label="Player ID or Link to OpenDota/Dotabuff",
	placeholder="Enter player ID or full profile URL"),
	gr.Number(label="MMR", value=3000),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 1)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 2)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 3)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 4)"),
	gr.Slider(1, 5, value=3, step=1, label="Comfort (Pos 5)")
	],
	examples=[
	["https://www.dotabuff.com/players/188649776", 6812, 5, 5, 4, 2, 1]
	],
	outputs=gr.Textbox(label="Prediction Results"),
	title="RD2L Player Cost Predictor",
	description="""This tool predicts the auction cost for RD2L players based on their MMR,
	position comfort levels, and historical performance data from OpenDota.
	Enter a player's OpenDota ID or profile URL along with their current stats
	to get a predicted cost.""",
	article="""### How it works
	- The predictor uses machine learning trained on historical RD2L draft data
	- Player statistics are fetched from OpenDota API
	- Position comfort levels range from 1 (least comfortable) to 5 (most comfortable)
	- Predictions are based on both current stats and historical performance

	### Notes
	- MMR should be the player's current solo MMR
	- Position comfort should reflect actual role experience
	- Predictions are estimates and may vary from actual draft results"""
	)

	if __name__ == "__main__":
	demo.launch()