Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update model testing at server startup
Browse files- backend/config/models_config.py +4 -0
- backend/main.py +30 -3
- backend/tasks/get_available_model_provider.py +163 -28
backend/config/models_config.py
CHANGED
@@ -25,6 +25,10 @@ ALTERNATIVE_BENCHMARK_MODELS = [
|
|
25 |
"meta-llama/Llama-3.1-8B-Instruct",
|
26 |
"Qwen/Qwen2.5-72B-Instruct",
|
27 |
"mistralai/Mistral-Small-24B-Instruct-2501",
|
|
|
|
|
|
|
|
|
28 |
]
|
29 |
|
30 |
# Required model for create_bench_config_file.py (only one default model)
|
|
|
25 |
"meta-llama/Llama-3.1-8B-Instruct",
|
26 |
"Qwen/Qwen2.5-72B-Instruct",
|
27 |
"mistralai/Mistral-Small-24B-Instruct-2501",
|
28 |
+
# Modèles open-source qui peuvent fonctionner sans authentification
|
29 |
+
"HuggingFaceH4/zephyr-7b-beta",
|
30 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
31 |
+
"microsoft/phi-2",
|
32 |
]
|
33 |
|
34 |
# Required model for create_bench_config_file.py (only one default model)
|
backend/main.py
CHANGED
@@ -4,6 +4,7 @@ import os
|
|
4 |
from dotenv import load_dotenv
|
5 |
from routes import routers, session_files, active_tasks, benchmark
|
6 |
from tasks.get_available_model_provider import test_models
|
|
|
7 |
|
8 |
# Load environment variables from .env file
|
9 |
load_dotenv()
|
@@ -11,11 +12,15 @@ load_dotenv()
|
|
11 |
# Verify environment variables are loaded
|
12 |
hf_token = os.getenv("HF_TOKEN")
|
13 |
if not hf_token:
|
14 |
-
print("
|
|
|
|
|
15 |
|
16 |
hf_organization = os.getenv("HF_ORGANIZATION")
|
17 |
if not hf_organization:
|
18 |
-
print("
|
|
|
|
|
19 |
|
20 |
app = FastAPI(title="Yourbench API")
|
21 |
|
@@ -31,9 +36,31 @@ app.add_middleware(
|
|
31 |
# Ajouter un gestionnaire d'événements pour afficher les session_files au démarrage
|
32 |
@app.on_event("startup")
|
33 |
async def startup_event():
|
34 |
-
print("Application
|
35 |
print(f"Initial session_files: {session_files}")
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Tester les modèles au démarrage et afficher les résultats
|
38 |
print("===== Testing model availability at startup =====")
|
39 |
test_results = test_models(verbose=True)
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
from routes import routers, session_files, active_tasks, benchmark
|
6 |
from tasks.get_available_model_provider import test_models
|
7 |
+
from datetime import datetime
|
8 |
|
9 |
# Load environment variables from .env file
|
10 |
load_dotenv()
|
|
|
12 |
# Verify environment variables are loaded
|
13 |
hf_token = os.getenv("HF_TOKEN")
|
14 |
if not hf_token:
|
15 |
+
print("⚠️ WARNING: HF_TOKEN environment variable is not set.")
|
16 |
+
else:
|
17 |
+
print("ℹ️ HF_TOKEN found in environment variables")
|
18 |
|
19 |
hf_organization = os.getenv("HF_ORGANIZATION")
|
20 |
if not hf_organization:
|
21 |
+
print("⚠️ WARNING: HF_ORGANIZATION environment variable is not set.")
|
22 |
+
else:
|
23 |
+
print(f"ℹ️ HF_ORGANIZATION found: {hf_organization}")
|
24 |
|
25 |
app = FastAPI(title="Yourbench API")
|
26 |
|
|
|
36 |
# Ajouter un gestionnaire d'événements pour afficher les session_files au démarrage
|
37 |
@app.on_event("startup")
|
38 |
async def startup_event():
|
39 |
+
print("\n===== Application Startup at", datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "=====\n")
|
40 |
print(f"Initial session_files: {session_files}")
|
41 |
|
42 |
+
# Afficher des informations détaillées sur les variables d'environnement
|
43 |
+
print("\n===== Environment Variables Check =====")
|
44 |
+
hf_token = os.environ.get("HF_TOKEN")
|
45 |
+
if hf_token:
|
46 |
+
print("✅ HF_TOKEN AVAILABLE")
|
47 |
+
else:
|
48 |
+
print("❌ HF_TOKEN MISSING - HuggingFace models will not work correctly")
|
49 |
+
|
50 |
+
hf_organization = os.environ.get("HF_ORGANIZATION")
|
51 |
+
if hf_organization:
|
52 |
+
print(f"✅ HF_ORGANIZATION: {hf_organization}")
|
53 |
+
else:
|
54 |
+
print("❌ HF_ORGANIZATION MISSING")
|
55 |
+
|
56 |
+
print("\n===== Additional Environment Variables =====")
|
57 |
+
# Afficher d'autres variables utiles
|
58 |
+
for env_var in ["PORT", "DEBUG", "PYTHONPATH", "VIRTUAL_ENV"]:
|
59 |
+
value = os.environ.get(env_var)
|
60 |
+
if value:
|
61 |
+
print(f"ℹ️ {env_var}: {value}")
|
62 |
+
print("=======================================\n")
|
63 |
+
|
64 |
# Tester les modèles au démarrage et afficher les résultats
|
65 |
print("===== Testing model availability at startup =====")
|
66 |
test_results = test_models(verbose=True)
|
backend/tasks/get_available_model_provider.py
CHANGED
@@ -30,31 +30,104 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
|
|
30 |
"""
|
31 |
|
32 |
try:
|
33 |
-
|
34 |
load_dotenv()
|
35 |
|
36 |
# Get HF token from environment
|
37 |
hf_token = os.environ.get("HF_TOKEN")
|
38 |
if not hf_token:
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
hf_organization = os.environ.get("HF_ORGANIZATION")
|
42 |
if not hf_organization:
|
43 |
-
|
44 |
-
|
45 |
|
46 |
if verbose:
|
47 |
logger.info(f"Testing provider {provider} for model {model_name}")
|
48 |
|
49 |
# Initialize the InferenceClient with the specific provider
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
client = InferenceClient(
|
51 |
model=model_name,
|
52 |
-
token=hf_token,
|
53 |
provider=provider,
|
54 |
-
|
55 |
-
timeout=3 # Increased timeout to allow model loading
|
56 |
)
|
57 |
-
|
58 |
try:
|
59 |
# Use the chat completions method for testing
|
60 |
response = client.chat_completion(
|
@@ -63,28 +136,17 @@ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool
|
|
63 |
)
|
64 |
|
65 |
if verbose:
|
66 |
-
logger.info(f"Provider {provider} is available for {model_name}")
|
67 |
return True
|
68 |
|
69 |
except Exception as e:
|
70 |
if verbose:
|
71 |
-
|
72 |
-
logger.warning(f"Error with provider {provider}: {error_message}")
|
73 |
-
|
74 |
-
# Log specific error types if we can identify them
|
75 |
-
if "status_code=429" in error_message:
|
76 |
-
logger.warning(f"Provider {provider} rate limited. You may need to wait or upgrade your plan.")
|
77 |
-
elif "status_code=401" in error_message:
|
78 |
-
logger.warning(f"Authentication failed for provider {provider}. Check your token.")
|
79 |
-
elif "status_code=503" in error_message:
|
80 |
-
logger.warning(f"Provider {provider} service unavailable. Model may be loading or provider is down.")
|
81 |
-
elif "timed out" in error_message.lower():
|
82 |
-
logger.warning(f"Timeout error with provider {provider} - request timed out after 10 seconds")
|
83 |
return False
|
84 |
|
85 |
except Exception as e:
|
86 |
if verbose:
|
87 |
-
logger.warning(f"Error in
|
88 |
return False
|
89 |
|
90 |
def get_available_model_provider(model_name, verbose=False):
|
@@ -108,21 +170,48 @@ def get_available_model_provider(model_name, verbose=False):
|
|
108 |
|
109 |
# Get providers for the model and prioritize them
|
110 |
try:
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
if not hasattr(info, "inference_provider_mapping"):
|
113 |
if verbose:
|
114 |
logger.info(f"No inference providers found for {model_name}")
|
115 |
-
|
|
|
116 |
|
117 |
providers = list(info.inference_provider_mapping.keys())
|
118 |
if not providers:
|
119 |
if verbose:
|
120 |
logger.info(f"Empty list of providers for {model_name}")
|
121 |
-
|
|
|
122 |
except Exception as e:
|
123 |
if verbose:
|
124 |
logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
|
125 |
-
|
|
|
126 |
|
127 |
# Prioritize providers
|
128 |
prioritized_providers = prioritize_providers(providers)
|
@@ -179,7 +268,39 @@ def get_available_model_provider(model_name, verbose=False):
|
|
179 |
if verbose:
|
180 |
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
181 |
return None
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
def test_models(verbose=True):
|
184 |
"""
|
185 |
Test le modèle par défaut et les modèles alternatifs, puis retourne un résumé des résultats.
|
@@ -199,6 +320,20 @@ def test_models(verbose=True):
|
|
199 |
"unavailable_models": []
|
200 |
}
|
201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
if verbose:
|
203 |
print(f"Testing main default model: {DEFAULT_BENCHMARK_MODEL}")
|
204 |
|
|
|
30 |
"""
|
31 |
|
32 |
try:
|
|
|
33 |
load_dotenv()
|
34 |
|
35 |
# Get HF token from environment
|
36 |
hf_token = os.environ.get("HF_TOKEN")
|
37 |
if not hf_token:
|
38 |
+
if verbose:
|
39 |
+
logger.warning("HF_TOKEN not defined in environment, trying without token")
|
40 |
+
# Essayer sans token (pour certains providers qui acceptent des requêtes anonymes)
|
41 |
+
return _test_provider_without_token(model_name, provider, verbose)
|
42 |
+
|
43 |
+
# Get HF organization from environment
|
44 |
hf_organization = os.environ.get("HF_ORGANIZATION")
|
45 |
if not hf_organization:
|
46 |
+
if verbose:
|
47 |
+
logger.warning("HF_ORGANIZATION not defined in environment")
|
48 |
|
49 |
if verbose:
|
50 |
logger.info(f"Testing provider {provider} for model {model_name}")
|
51 |
|
52 |
# Initialize the InferenceClient with the specific provider
|
53 |
+
try:
|
54 |
+
client = InferenceClient(
|
55 |
+
model=model_name,
|
56 |
+
token=hf_token,
|
57 |
+
provider=provider,
|
58 |
+
# bill_to=hf_organization if hf_organization else None,
|
59 |
+
timeout=3 # Increased timeout to allow model loading
|
60 |
+
)
|
61 |
+
|
62 |
+
try:
|
63 |
+
# Use the chat completions method for testing
|
64 |
+
response = client.chat_completion(
|
65 |
+
messages=[{"role": "user", "content": "Hello"}],
|
66 |
+
max_tokens=5
|
67 |
+
)
|
68 |
+
|
69 |
+
if verbose:
|
70 |
+
logger.info(f"Provider {provider} is available for {model_name}")
|
71 |
+
return True
|
72 |
+
|
73 |
+
except Exception as e:
|
74 |
+
if verbose:
|
75 |
+
error_message = str(e)
|
76 |
+
logger.warning(f"Error with provider {provider}: {error_message}")
|
77 |
+
|
78 |
+
# Log specific error types if we can identify them
|
79 |
+
if "status_code=429" in error_message:
|
80 |
+
logger.warning(f"Provider {provider} rate limited. You may need to wait or upgrade your plan.")
|
81 |
+
elif "status_code=401" in error_message or "status_code=403" in error_message:
|
82 |
+
logger.warning(f"Authentication failed for provider {provider}. Check your token.")
|
83 |
+
# Essayer sans token
|
84 |
+
if verbose:
|
85 |
+
logger.info(f"Trying provider {provider} without authentication")
|
86 |
+
return _test_provider_without_token(model_name, provider, verbose)
|
87 |
+
elif "status_code=503" in error_message:
|
88 |
+
logger.warning(f"Provider {provider} service unavailable. Model may be loading or provider is down.")
|
89 |
+
elif "timed out" in error_message.lower():
|
90 |
+
logger.warning(f"Timeout error with provider {provider} - request timed out after 10 seconds")
|
91 |
+
return False
|
92 |
+
except Exception as auth_error:
|
93 |
+
if "401" in str(auth_error) or "Unauthorized" in str(auth_error):
|
94 |
+
# En cas d'erreur d'authentification, essayer sans token
|
95 |
+
if verbose:
|
96 |
+
logger.warning(f"Authentication error with {provider}: {str(auth_error)}. Trying without token.")
|
97 |
+
return _test_provider_without_token(model_name, provider, verbose)
|
98 |
+
else:
|
99 |
+
if verbose:
|
100 |
+
logger.warning(f"Error creating client for {provider}: {str(auth_error)}")
|
101 |
+
return False
|
102 |
+
|
103 |
+
except Exception as e:
|
104 |
+
if verbose:
|
105 |
+
logger.warning(f"Error in test_provider: {str(e)}")
|
106 |
+
return False
|
107 |
+
|
108 |
+
def _test_provider_without_token(model_name: str, provider: str, verbose: bool = False) -> bool:
|
109 |
+
"""
|
110 |
+
Essaye de tester un provider sans token d'authentification
|
111 |
+
|
112 |
+
Args:
|
113 |
+
model_name: Nom du modèle
|
114 |
+
provider: Provider à tester
|
115 |
+
verbose: Afficher les logs détaillés
|
116 |
+
|
117 |
+
Returns:
|
118 |
+
True si le provider est disponible, False sinon
|
119 |
+
"""
|
120 |
+
try:
|
121 |
+
if verbose:
|
122 |
+
logger.info(f"Testing provider {provider} for model {model_name} without authentication")
|
123 |
+
|
124 |
+
# Initialize without token
|
125 |
client = InferenceClient(
|
126 |
model=model_name,
|
|
|
127 |
provider=provider,
|
128 |
+
timeout=3
|
|
|
129 |
)
|
130 |
+
|
131 |
try:
|
132 |
# Use the chat completions method for testing
|
133 |
response = client.chat_completion(
|
|
|
136 |
)
|
137 |
|
138 |
if verbose:
|
139 |
+
logger.info(f"Provider {provider} is available for {model_name} without authentication")
|
140 |
return True
|
141 |
|
142 |
except Exception as e:
|
143 |
if verbose:
|
144 |
+
logger.warning(f"Error with provider {provider} without authentication: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
return False
|
146 |
|
147 |
except Exception as e:
|
148 |
if verbose:
|
149 |
+
logger.warning(f"Error in _test_provider_without_token: {str(e)}")
|
150 |
return False
|
151 |
|
152 |
def get_available_model_provider(model_name, verbose=False):
|
|
|
170 |
|
171 |
# Get providers for the model and prioritize them
|
172 |
try:
|
173 |
+
# Essayer avec le token
|
174 |
+
try:
|
175 |
+
if verbose:
|
176 |
+
logger.info(f"Trying to get model info for {model_name} with auth token")
|
177 |
+
info = model_info(model_name, token=hf_token, expand="inferenceProviderMapping")
|
178 |
+
except Exception as auth_error:
|
179 |
+
# Si l'authentification échoue, essayer sans token (pour les modèles publics)
|
180 |
+
if "401" in str(auth_error) or "Unauthorized" in str(auth_error):
|
181 |
+
if verbose:
|
182 |
+
logger.warning(f"Authentication failed for {model_name}, trying without token")
|
183 |
+
# Essayer de récupérer les infos sans token
|
184 |
+
try:
|
185 |
+
info = model_info(model_name, expand="inferenceProviderMapping")
|
186 |
+
except Exception as e:
|
187 |
+
if verbose:
|
188 |
+
logger.error(f"Failed to get model info without token: {str(e)}")
|
189 |
+
# Comme dernier recours, retourner la liste des providers par défaut pour tester
|
190 |
+
if verbose:
|
191 |
+
logger.warning(f"Using default providers list as fallback for {model_name}")
|
192 |
+
# Fournir une liste de providers de secours pour tester directement
|
193 |
+
return _test_fallback_providers(model_name, verbose)
|
194 |
+
else:
|
195 |
+
# Autre erreur, la relancer
|
196 |
+
raise auth_error
|
197 |
+
|
198 |
if not hasattr(info, "inference_provider_mapping"):
|
199 |
if verbose:
|
200 |
logger.info(f"No inference providers found for {model_name}")
|
201 |
+
# Essayer avec la liste de providers par défaut
|
202 |
+
return _test_fallback_providers(model_name, verbose)
|
203 |
|
204 |
providers = list(info.inference_provider_mapping.keys())
|
205 |
if not providers:
|
206 |
if verbose:
|
207 |
logger.info(f"Empty list of providers for {model_name}")
|
208 |
+
# Essayer avec la liste de providers par défaut
|
209 |
+
return _test_fallback_providers(model_name, verbose)
|
210 |
except Exception as e:
|
211 |
if verbose:
|
212 |
logger.error(f"Error retrieving model info for {model_name}: {str(e)}")
|
213 |
+
# Essayer avec la liste de providers par défaut
|
214 |
+
return _test_fallback_providers(model_name, verbose)
|
215 |
|
216 |
# Prioritize providers
|
217 |
prioritized_providers = prioritize_providers(providers)
|
|
|
268 |
if verbose:
|
269 |
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
270 |
return None
|
271 |
+
|
272 |
+
def _test_fallback_providers(model_name, verbose=False):
|
273 |
+
"""
|
274 |
+
Fonction de secours qui teste une liste de providers communs sans passer par l'API
|
275 |
+
|
276 |
+
Args:
|
277 |
+
model_name: Nom du modèle
|
278 |
+
verbose: Afficher les logs détaillés
|
279 |
+
|
280 |
+
Returns:
|
281 |
+
Le premier provider disponible ou None
|
282 |
+
"""
|
283 |
+
# Liste de providers à tester en direct
|
284 |
+
default_providers = ["huggingface", "sambanova", "novita", "fireworks-ai", "together", "openai", "anthropic"]
|
285 |
+
|
286 |
+
if verbose:
|
287 |
+
logger.warning(f"Using fallback providers list for {model_name}: {', '.join(default_providers)}")
|
288 |
+
|
289 |
+
# Tester chaque provider directement
|
290 |
+
for provider in default_providers:
|
291 |
+
if verbose:
|
292 |
+
logger.info(f"Testing fallback provider {provider} for {model_name}")
|
293 |
+
try:
|
294 |
+
if test_provider(model_name, provider, verbose):
|
295 |
+
if verbose:
|
296 |
+
logger.info(f"FALLBACK: Provider {provider} is available for {model_name}")
|
297 |
+
return provider
|
298 |
+
except Exception as e:
|
299 |
+
if verbose:
|
300 |
+
logger.warning(f"FALLBACK: Error testing provider {provider} for {model_name}: {str(e)}")
|
301 |
+
|
302 |
+
return None
|
303 |
+
|
304 |
def test_models(verbose=True):
|
305 |
"""
|
306 |
Test le modèle par défaut et les modèles alternatifs, puis retourne un résumé des résultats.
|
|
|
320 |
"unavailable_models": []
|
321 |
}
|
322 |
|
323 |
+
# Obtenez le jeton HF
|
324 |
+
hf_token = os.environ.get("HF_TOKEN")
|
325 |
+
if hf_token:
|
326 |
+
print("HF_TOKEN is available")
|
327 |
+
else:
|
328 |
+
print("HF_TOKEN is missing")
|
329 |
+
|
330 |
+
# Obtenez l'organisation HF
|
331 |
+
hf_organization = os.environ.get("HF_ORGANIZATION")
|
332 |
+
if hf_organization:
|
333 |
+
print(f"HF_ORGANIZATION is available: {hf_organization}")
|
334 |
+
else:
|
335 |
+
print("HF_ORGANIZATION is missing")
|
336 |
+
|
337 |
if verbose:
|
338 |
print(f"Testing main default model: {DEFAULT_BENCHMARK_MODEL}")
|
339 |
|