Spaces:

yourbench
/

demo

Running on CPU Upgrade

App Files Files Community

demo / backend /config /models_config.py

tfrere

update benchmark model

da60a9e 3 months ago

raw

history blame

1.88 kB

	"""
	Central configuration for models and providers

	This file centralizes all configurations related to models and providers used in the application.
	"""

	# Definition of preferred providers, used in get_available_model_provider.py
	# PREFERRED_PROVIDERS = ["sambanova", "novita"]
	PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]

	# Default models to evaluate for evaluation
	DEFAULT_EVALUATION_MODELS = [
	"Qwen/QwQ-32B",
	"Qwen/Qwen2.5-72B-Instruct",
	"Qwen/Qwen2.5-32B-Instruct",
	"meta-llama/Llama-3.1-8B-Instruct",
	"meta-llama/Llama-3.3-70B-Instruct",
	"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
	"mistralai/Mistral-Small-24B-Instruct-2501",
	]

	# Modèles alternatifs à utiliser si le modèle par défaut n'est pas disponible
	ALTERNATIVE_BENCHMARK_MODELS = [
	"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
	"meta-llama/Llama-3.3-70B-Instruct",
	"meta-llama/Llama-3.1-8B-Instruct",
	"Qwen/Qwen2.5-72B-Instruct",
	"mistralai/Mistral-Small-24B-Instruct-2501",
	# Modèles open-source qui peuvent fonctionner sans authentification
	"HuggingFaceH4/zephyr-7b-beta",
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	"microsoft/phi-2",
	]

	# Required model for create_bench_config_file.py (only one default model)
	DEFAULT_BENCHMARK_MODEL = "Qwen/Qwen2.5-32B-Instruct"

	# Models by roles for benchmark configuration
	# All roles use the default model except chunking
	BENCHMARK_MODEL_ROLES = {
	"ingestion": [DEFAULT_BENCHMARK_MODEL],
	"summarization": [DEFAULT_BENCHMARK_MODEL],
	"chunking": ["intfloat/multilingual-e5-large-instruct"],
	"single_shot_question_generation": [DEFAULT_BENCHMARK_MODEL],
	"multi_hop_question_generation": [DEFAULT_BENCHMARK_MODEL],
	}

	# Default evaluation timeout (in seconds)
	DEFAULT_EVALUATION_TIMEOUT = 60.0

	# Default benchmark timeout (in seconds)
	DEFAULT_BENCHMARK_TIMEOUT = 300.0