demo / backend /config /models_config.py
tfrere's picture
update benchmark model
da60a9e
raw
history blame
1.88 kB
"""
Central configuration for models and providers
This file centralizes all configurations related to models and providers used in the application.
"""
# Definition of preferred providers, used in get_available_model_provider.py
# PREFERRED_PROVIDERS = ["sambanova", "novita"]
PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]
# Default models to evaluate for evaluation
DEFAULT_EVALUATION_MODELS = [
"Qwen/QwQ-32B",
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-32B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
"meta-llama/Llama-3.3-70B-Instruct",
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
"mistralai/Mistral-Small-24B-Instruct-2501",
]
# Modèles alternatifs à utiliser si le modèle par défaut n'est pas disponible
ALTERNATIVE_BENCHMARK_MODELS = [
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
"Qwen/Qwen2.5-72B-Instruct",
"mistralai/Mistral-Small-24B-Instruct-2501",
# Modèles open-source qui peuvent fonctionner sans authentification
"HuggingFaceH4/zephyr-7b-beta",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"microsoft/phi-2",
]
# Required model for create_bench_config_file.py (only one default model)
DEFAULT_BENCHMARK_MODEL = "Qwen/Qwen2.5-32B-Instruct"
# Models by roles for benchmark configuration
# All roles use the default model except chunking
BENCHMARK_MODEL_ROLES = {
"ingestion": [DEFAULT_BENCHMARK_MODEL],
"summarization": [DEFAULT_BENCHMARK_MODEL],
"chunking": ["intfloat/multilingual-e5-large-instruct"],
"single_shot_question_generation": [DEFAULT_BENCHMARK_MODEL],
"multi_hop_question_generation": [DEFAULT_BENCHMARK_MODEL],
}
# Default evaluation timeout (in seconds)
DEFAULT_EVALUATION_TIMEOUT = 60.0
# Default benchmark timeout (in seconds)
DEFAULT_BENCHMARK_TIMEOUT = 300.0