File size: 1,845 Bytes
c2b7f1b
 
 
 
 
 
 
d2805fc
c2b7f1b
 
 
 
 
 
 
 
 
 
 
 
 
d2805fc
 
 
 
 
 
97bea1c
 
 
 
d2805fc
 
c2b7f1b
d2805fc
c2b7f1b
 
 
8695aa8
 
 
c2b7f1b
8695aa8
 
c2b7f1b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
Central configuration for models and providers

This file centralizes all configurations related to models and providers used in the application.
"""

# Definition of preferred providers, used in get_available_model_provider.py
# PREFERRED_PROVIDERS = ["sambanova", "novita"]
PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]

# Default models to evaluate for evaluation
DEFAULT_EVALUATION_MODELS = [
    "Qwen/QwQ-32B",
    "Qwen/Qwen2.5-72B-Instruct",
    "Qwen/Qwen2.5-32B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
    "meta-llama/Llama-3.3-70B-Instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
    "mistralai/Mistral-Small-24B-Instruct-2501",
]

# Modèles alternatifs à utiliser si le modèle par défaut n'est pas disponible
ALTERNATIVE_BENCHMARK_MODELS = [
    "meta-llama/Llama-3.3-70B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
    "Qwen/Qwen2.5-72B-Instruct",
    "mistralai/Mistral-Small-24B-Instruct-2501",
    # Modèles open-source qui peuvent fonctionner sans authentification
    "HuggingFaceH4/zephyr-7b-beta",
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "microsoft/phi-2",
]

# Required model for create_bench_config_file.py (only one default model)
DEFAULT_BENCHMARK_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"

# Models by roles for benchmark configuration
# All roles use the default model except chunking
BENCHMARK_MODEL_ROLES = {
    "ingestion": [DEFAULT_BENCHMARK_MODEL],
    "summarization": [DEFAULT_BENCHMARK_MODEL],
    "chunking": ["intfloat/multilingual-e5-large-instruct"],
    "single_shot_question_generation": [DEFAULT_BENCHMARK_MODEL],
    "multi_hop_question_generation": [DEFAULT_BENCHMARK_MODEL],
}

# Default evaluation timeout (in seconds)
DEFAULT_EVALUATION_TIMEOUT = 60.0

# Default benchmark timeout (in seconds)
DEFAULT_BENCHMARK_TIMEOUT = 300.0