tfrere commited on
Commit
97bea1c
·
1 Parent(s): 4759fe1

update model testing at server startup

Browse files
backend/config/models_config.py CHANGED
@@ -25,6 +25,10 @@ ALTERNATIVE_BENCHMARK_MODELS = [
25
  "meta-llama/Llama-3.1-8B-Instruct",
26
  "Qwen/Qwen2.5-72B-Instruct",
27
  "mistralai/Mistral-Small-24B-Instruct-2501",
 
 
 
 
28
  ]
29
 
30
  # Required model for create_bench_config_file.py (only one default model)
 
25
  "meta-llama/Llama-3.1-8B-Instruct",
26
  "Qwen/Qwen2.5-72B-Instruct",
27
  "mistralai/Mistral-Small-24B-Instruct-2501",
28
+ # Modèles open-source qui peuvent fonctionner sans authentification
29
+ "HuggingFaceH4/zephyr-7b-beta",
30
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
31
+ "microsoft/phi-2",
32
  ]
33
 
34
  # Required model for create_bench_config_file.py (only one default model)
backend/main.py CHANGED
@@ -4,6 +4,7 @@ import os
4
  from dotenv import load_dotenv
5
  from routes import routers, session_files, active_tasks, benchmark
6
  from tasks.get_available_model_provider import test_models
 
7
 
8
  # Load environment variables from .env file
9
  load_dotenv()
@@ -11,11 +12,15 @@ load_dotenv()
11
  # Verify environment variables are loaded
12
  hf_token = os.getenv("HF_TOKEN")
13
  if not hf_token:
14
- print("Warning: HF_TOKEN environment variable is not set. Make sure it's defined in your .env file.")
 
 
15
 
16
  hf_organization = os.getenv("HF_ORGANIZATION")
17
  if not hf_organization:
18
- print("Warning: HF_ORGANIZATION environment variable is not set. Make sure it's defined in your .env file.")
 
 
19
 
20
  app = FastAPI(title="Yourbench API")
21
 
@@ -31,9 +36,31 @@ app.add_middleware(
31
  # Ajouter un gestionnaire d'événements pour afficher les session_files au démarrage
32
  @app.on_event("startup")
33
  async def startup_event():
34
- print("Application startup")
35
  print(f"Initial session_files: {session_files}")
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Tester les modèles au démarrage et afficher les résultats
38
  print("===== Testing model availability at startup =====")
39
  test_results = test_models(verbose=True)
 
4
  from dotenv import load_dotenv
5
  from routes import routers, session_files, active_tasks, benchmark
6
  from tasks.get_available_model_provider import test_models
7
+ from datetime import datetime
8
 
9
  # Load environment variables from .env file
10
  load_dotenv()
 
12
  # Verify environment variables are loaded
13
  hf_token = os.getenv("HF_TOKEN")
14
  if not hf_token:
15
+ print("⚠️ WARNING: HF_TOKEN environment variable is not set.")
16
+ else:
17
+ print("ℹ️ HF_TOKEN found in environment variables")
18
 
19
  hf_organization = os.getenv("HF_ORGANIZATION")
20
  if not hf_organization:
21
+ print("⚠️ WARNING: HF_ORGANIZATION environment variable is not set.")
22
+ else:
23
+ print(f"ℹ️ HF_ORGANIZATION found: {hf_organization}")
24
 
25
  app = FastAPI(title="Yourbench API")
26
 
 
36
  # Ajouter un gestionnaire d'événements pour afficher les session_files au démarrage
37
  @app.on_event("startup")
38
  async def startup_event():
39
+ print("\n===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====\n")
40
  print(f"Initial session_files: {session_files}")
41
 
42
+ # Afficher des informations détaillées sur les variables d'environnement
43
+ print("\n===== Environment Variables Check =====")
44
+ hf_token = os.environ.get("HF_TOKEN")
45
+ if hf_token:
46
+ print("✅ HF_TOKEN AVAILABLE")
47
+ else:
48
+ print("❌ HF_TOKEN MISSING - HuggingFace models will not work correctly")
49
+
50
+ hf_organization = os.environ.get("HF_ORGANIZATION")
51
+ if hf_organization:
52
+ print(f"✅ HF_ORGANIZATION: {hf_organization}")
53
+ else:
54
+ print("❌ HF_ORGANIZATION MISSING")
55
+
56
+ print("\n===== Additional Environment Variables =====")
57
+ # Afficher d'autres variables utiles
58
+ for env_var in ["PORT", "DEBUG", "PYTHONPATH", "VIRTUAL_ENV"]:
59
+ value = os.environ.get(env_var)
60
+ if value:
61
+ print(f"ℹ️ {env_var}: {value}")
62
+ print("=======================================\n")
63
+
64
  # Tester les modèles au démarrage et afficher les résultats
65
  print("===== Testing model availability at startup =====")
66
  test_results = test_models(verbose=True)
backend/tasks/get_available_model_provider.py CHANGED
@@ -30,31 +30,104 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
30
  """
31
 
32
  try:
33
-
34
  load_dotenv()
35
 
36
  # Get HF token from environment
37
  hf_token = os.environ.get("HF_TOKEN")
38
  if not hf_token:
39
- raise ValueError("HF_TOKEN not defined in environment")
40
- # Get HF token from environment
 
 
 
 
41
  hf_organization = os.environ.get("HF_ORGANIZATION")
42
  if not hf_organization:
43
- raise ValueError("HF_ORGANIZATION not defined in environment")
44
-
45
 
46
  if verbose:
47
  logger.info(f"Testing provider {provider} for model {model_name}")
48
 
49
  # Initialize the InferenceClient with the specific provider
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  client = InferenceClient(
51
  model=model_name,
52
- token=hf_token,
53
  provider=provider,
54
- # bill_to=hf_organization,
55
- timeout=3 # Increased timeout to allow model loading
56
  )
57
-
58
  try:
59
  # Use the chat completions method for testing
60
  response = client.chat_completion(
@@ -63,28 +136,17 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
63
  )
64
 
65
  if verbose:
66
- logger.info(f"Provider {provider} is available for {model_name}")
67
  return True
68
 
69
  except Exception as e:
70
  if verbose:
71
- error_message = str(e)
72
- logger.warning(f"Error with provider {provider}: {error_message}")
73
-
74
- # Log specific error types if we can identify them
75
- if "status_code=429" in error_message:
76
- logger.warning(f"Provider {provider} rate limited. You may need to wait or upgrade your plan.")
77
- elif "status_code=401" in error_message:
78
- logger.warning(f"Authentication failed for provider {provider}. Check your token.")
79
- elif "status_code=503" in error_message:
80
- logger.warning(f"Provider {provider} service unavailable. Model may be loading or provider is down.")
81
- elif "timed out" in error_message.lower():
82
- logger.warning(f"Timeout error with provider {provider} - request timed out after 10 seconds")
83
  return False
84
 
85
  except Exception as e:
86
  if verbose:
87
- logger.warning(f"Error in test_provider: {str(e)}")
88
  return False
89
 
90
  def get_available_model_provider(model_name, verbose=False):
@@ -108,21 +170,48 @@ def get_available_model_provider(model_name, verbose=False):
108
 
109
  # Get providers for the model and prioritize them
110
  try:
111
- info = model_info(model_name, token=hf_token, expand="inferenceProviderMapping")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  if not hasattr(info, "inference_provider_mapping"):
113
  if verbose:
114
  logger.info(f"No inference providers found for {model_name}")
115
- return None
 
116
 
117
  providers = list(info.inference_provider_mapping.keys())
118
  if not providers:
119
  if verbose:
120
  logger.info(f"Empty list of providers for {model_name}")
121
- return None
 
122
  except Exception as e:
123
  if verbose:
124
  logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
125
- return None
 
126
 
127
  # Prioritize providers
128
  prioritized_providers = prioritize_providers(providers)
@@ -179,7 +268,39 @@ def get_available_model_provider(model_name, verbose=False):
179
  if verbose:
180
  logger.error(f"Error in get_available_model_provider: {str(e)}")
181
  return None
182
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  def test_models(verbose=True):
184
  """
185
  Test le modèle par défaut et les modèles alternatifs, puis retourne un résumé des résultats.
@@ -199,6 +320,20 @@ def test_models(verbose=True):
199
  "unavailable_models": []
200
  }
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  if verbose:
203
  print(f"Testing main default model: {DEFAULT_BENCHMARK_MODEL}")
204
 
 
30
  """
31
 
32
  try:
 
33
  load_dotenv()
34
 
35
  # Get HF token from environment
36
  hf_token = os.environ.get("HF_TOKEN")
37
  if not hf_token:
38
+ if verbose:
39
+ logger.warning("HF_TOKEN not defined in environment, trying without token")
40
+ # Essayer sans token (pour certains providers qui acceptent des requêtes anonymes)
41
+ return _test_provider_without_token(model_name, provider, verbose)
42
+
43
+ # Get HF organization from environment
44
  hf_organization = os.environ.get("HF_ORGANIZATION")
45
  if not hf_organization:
46
+ if verbose:
47
+ logger.warning("HF_ORGANIZATION not defined in environment")
48
 
49
  if verbose:
50
  logger.info(f"Testing provider {provider} for model {model_name}")
51
 
52
  # Initialize the InferenceClient with the specific provider
53
+ try:
54
+ client = InferenceClient(
55
+ model=model_name,
56
+ token=hf_token,
57
+ provider=provider,
58
+ # bill_to=hf_organization if hf_organization else None,
59
+ timeout=3 # Increased timeout to allow model loading
60
+ )
61
+
62
+ try:
63
+ # Use the chat completions method for testing
64
+ response = client.chat_completion(
65
+ messages=[{"role": "user", "content": "Hello"}],
66
+ max_tokens=5
67
+ )
68
+
69
+ if verbose:
70
+ logger.info(f"Provider {provider} is available for {model_name}")
71
+ return True
72
+
73
+ except Exception as e:
74
+ if verbose:
75
+ error_message = str(e)
76
+ logger.warning(f"Error with provider {provider}: {error_message}")
77
+
78
+ # Log specific error types if we can identify them
79
+ if "status_code=429" in error_message:
80
+ logger.warning(f"Provider {provider} rate limited. You may need to wait or upgrade your plan.")
81
+ elif "status_code=401" in error_message or "status_code=403" in error_message:
82
+ logger.warning(f"Authentication failed for provider {provider}. Check your token.")
83
+ # Essayer sans token
84
+ if verbose:
85
+ logger.info(f"Trying provider {provider} without authentication")
86
+ return _test_provider_without_token(model_name, provider, verbose)
87
+ elif "status_code=503" in error_message:
88
+ logger.warning(f"Provider {provider} service unavailable. Model may be loading or provider is down.")
89
+ elif "timed out" in error_message.lower():
90
+ logger.warning(f"Timeout error with provider {provider} - request timed out after 10 seconds")
91
+ return False
92
+ except Exception as auth_error:
93
+ if "401" in str(auth_error) or "Unauthorized" in str(auth_error):
94
+ # En cas d'erreur d'authentification, essayer sans token
95
+ if verbose:
96
+ logger.warning(f"Authentication error with {provider}: {str(auth_error)}. Trying without token.")
97
+ return _test_provider_without_token(model_name, provider, verbose)
98
+ else:
99
+ if verbose:
100
+ logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
101
+ return False
102
+
103
+ except Exception as e:
104
+ if verbose:
105
+ logger.warning(f"Error in test_provider: {str(e)}")
106
+ return False
107
+
108
+ def _test_provider_without_token(model_name: str, provider: str, verbose: bool = False) -> bool:
109
+ """
110
+ Essaye de tester un provider sans token d'authentification
111
+
112
+ Args:
113
+ model_name: Nom du modèle
114
+ provider: Provider à tester
115
+ verbose: Afficher les logs détaillés
116
+
117
+ Returns:
118
+ True si le provider est disponible, False sinon
119
+ """
120
+ try:
121
+ if verbose:
122
+ logger.info(f"Testing provider {provider} for model {model_name} without authentication")
123
+
124
+ # Initialize without token
125
  client = InferenceClient(
126
  model=model_name,
 
127
  provider=provider,
128
+ timeout=3
 
129
  )
130
+
131
  try:
132
  # Use the chat completions method for testing
133
  response = client.chat_completion(
 
136
  )
137
 
138
  if verbose:
139
+ logger.info(f"Provider {provider} is available for {model_name} without authentication")
140
  return True
141
 
142
  except Exception as e:
143
  if verbose:
144
+ logger.warning(f"Error with provider {provider} without authentication: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
145
  return False
146
 
147
  except Exception as e:
148
  if verbose:
149
+ logger.warning(f"Error in _test_provider_without_token: {str(e)}")
150
  return False
151
 
152
  def get_available_model_provider(model_name, verbose=False):
 
170
 
171
  # Get providers for the model and prioritize them
172
  try:
173
+ # Essayer avec le token
174
+ try:
175
+ if verbose:
176
+ logger.info(f"Trying to get model info for {model_name} with auth token")
177
+ info = model_info(model_name, token=hf_token, expand="inferenceProviderMapping")
178
+ except Exception as auth_error:
179
+ # Si l'authentification échoue, essayer sans token (pour les modèles publics)
180
+ if "401" in str(auth_error) or "Unauthorized" in str(auth_error):
181
+ if verbose:
182
+ logger.warning(f"Authentication failed for {model_name}, trying without token")
183
+ # Essayer de récupérer les infos sans token
184
+ try:
185
+ info = model_info(model_name, expand="inferenceProviderMapping")
186
+ except Exception as e:
187
+ if verbose:
188
+ logger.error(f"Failed to get model info without token: {str(e)}")
189
+ # Comme dernier recours, retourner la liste des providers par défaut pour tester
190
+ if verbose:
191
+ logger.warning(f"Using default providers list as fallback for {model_name}")
192
+ # Fournir une liste de providers de secours pour tester directement
193
+ return _test_fallback_providers(model_name, verbose)
194
+ else:
195
+ # Autre erreur, la relancer
196
+ raise auth_error
197
+
198
  if not hasattr(info, "inference_provider_mapping"):
199
  if verbose:
200
  logger.info(f"No inference providers found for {model_name}")
201
+ # Essayer avec la liste de providers par défaut
202
+ return _test_fallback_providers(model_name, verbose)
203
 
204
  providers = list(info.inference_provider_mapping.keys())
205
  if not providers:
206
  if verbose:
207
  logger.info(f"Empty list of providers for {model_name}")
208
+ # Essayer avec la liste de providers par défaut
209
+ return _test_fallback_providers(model_name, verbose)
210
  except Exception as e:
211
  if verbose:
212
  logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
213
+ # Essayer avec la liste de providers par défaut
214
+ return _test_fallback_providers(model_name, verbose)
215
 
216
  # Prioritize providers
217
  prioritized_providers = prioritize_providers(providers)
 
268
  if verbose:
269
  logger.error(f"Error in get_available_model_provider: {str(e)}")
270
  return None
271
+
272
+ def _test_fallback_providers(model_name, verbose=False):
273
+ """
274
+ Fonction de secours qui teste une liste de providers communs sans passer par l'API
275
+
276
+ Args:
277
+ model_name: Nom du modèle
278
+ verbose: Afficher les logs détaillés
279
+
280
+ Returns:
281
+ Le premier provider disponible ou None
282
+ """
283
+ # Liste de providers à tester en direct
284
+ default_providers = ["huggingface", "sambanova", "novita", "fireworks-ai", "together", "openai", "anthropic"]
285
+
286
+ if verbose:
287
+ logger.warning(f"Using fallback providers list for {model_name}: {', '.join(default_providers)}")
288
+
289
+ # Tester chaque provider directement
290
+ for provider in default_providers:
291
+ if verbose:
292
+ logger.info(f"Testing fallback provider {provider} for {model_name}")
293
+ try:
294
+ if test_provider(model_name, provider, verbose):
295
+ if verbose:
296
+ logger.info(f"FALLBACK: Provider {provider} is available for {model_name}")
297
+ return provider
298
+ except Exception as e:
299
+ if verbose:
300
+ logger.warning(f"FALLBACK: Error testing provider {provider} for {model_name}: {str(e)}")
301
+
302
+ return None
303
+
304
  def test_models(verbose=True):
305
  """
306
  Test le modèle par défaut et les modèles alternatifs, puis retourne un résumé des résultats.
 
320
  "unavailable_models": []
321
  }
322
 
323
+ # Obtenez le jeton HF
324
+ hf_token = os.environ.get("HF_TOKEN")
325
+ if hf_token:
326
+ print("HF_TOKEN is available")
327
+ else:
328
+ print("HF_TOKEN is missing")
329
+
330
+ # Obtenez l'organisation HF
331
+ hf_organization = os.environ.get("HF_ORGANIZATION")
332
+ if hf_organization:
333
+ print(f"HF_ORGANIZATION is available: {hf_organization}")
334
+ else:
335
+ print("HF_ORGANIZATION is missing")
336
+
337
  if verbose:
338
  print(f"Testing main default model: {DEFAULT_BENCHMARK_MODEL}")
339