Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add get available model provider to benchmark generation
Browse files
backend/results.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
backend/routes/benchmark.py
CHANGED
@@ -162,40 +162,53 @@ class UnifiedBenchmarkTask:
|
|
162 |
self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
|
163 |
|
164 |
# Execute the configuration task
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
# Mark configuration step as completed
|
173 |
-
if "[SUCCESS] Stage completed: config_generation" not in self.logs:
|
174 |
-
self._add_log("[SUCCESS] Stage completed: configuration")
|
175 |
-
|
176 |
-
# Step 2: Benchmark
|
177 |
-
self._add_log("[INFO] Starting benchmark process")
|
178 |
-
self.bench_task = CreateBenchTask(session_uid=self.session_uid, config_path=config_path)
|
179 |
-
|
180 |
-
# Run the benchmark task
|
181 |
-
self.bench_task.run()
|
182 |
-
|
183 |
-
# Wait for the benchmark task to complete
|
184 |
-
while not self.bench_task.is_task_completed():
|
185 |
-
# Get new logs and add them
|
186 |
-
bench_logs = self.bench_task.get_logs()
|
187 |
-
for log in bench_logs:
|
188 |
self._add_log(log)
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
except Exception as e:
|
201 |
self._add_log(f"[ERROR] Benchmark process failed: {str(e)}")
|
|
|
162 |
self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
|
163 |
|
164 |
# Execute the configuration task
|
165 |
+
try:
|
166 |
+
config_path = self.config_task.run(file_path=file_path)
|
167 |
+
|
168 |
+
# Get configuration logs
|
169 |
+
config_logs = self.config_task.get_logs()
|
170 |
+
for log in config_logs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
self._add_log(log)
|
172 |
+
|
173 |
+
# Mark configuration step as completed
|
174 |
+
if "[SUCCESS] Stage completed: config_generation" not in self.logs:
|
175 |
+
self._add_log("[SUCCESS] Stage completed: configuration")
|
176 |
+
|
177 |
+
# Step 2: Benchmark
|
178 |
+
self._add_log("[INFO] Starting benchmark process")
|
179 |
+
self.bench_task = CreateBenchTask(session_uid=self.session_uid, config_path=config_path)
|
180 |
+
|
181 |
+
# Run the benchmark task
|
182 |
+
self.bench_task.run()
|
183 |
+
|
184 |
+
# Wait for the benchmark task to complete
|
185 |
+
while not self.bench_task.is_task_completed():
|
186 |
+
# Get new logs and add them
|
187 |
+
bench_logs = self.bench_task.get_logs()
|
188 |
+
for log in bench_logs:
|
189 |
+
self._add_log(log)
|
190 |
+
time.sleep(1)
|
191 |
+
|
192 |
+
# Get final logs
|
193 |
+
final_logs = self.bench_task.get_logs()
|
194 |
+
for log in final_logs:
|
195 |
+
self._add_log(log)
|
196 |
+
|
197 |
+
# Mark as completed
|
198 |
+
self.is_completed = True
|
199 |
+
self._add_log("[SUCCESS] Benchmark process completed successfully")
|
200 |
+
|
201 |
+
except Exception as config_error:
|
202 |
+
error_msg = str(config_error)
|
203 |
+
# Log detailed error
|
204 |
+
self._add_log(f"[ERROR] Configuration failed: {error_msg}")
|
205 |
+
|
206 |
+
# Check if it's a provider error and provide a more user-friendly message
|
207 |
+
if "Required models not available" in error_msg:
|
208 |
+
self._add_log("[ERROR] Some required models are not available at the moment. Please try again later.")
|
209 |
+
|
210 |
+
# Mark as completed with error
|
211 |
+
self.is_completed = True
|
212 |
|
213 |
except Exception as e:
|
214 |
self._add_log(f"[ERROR] Benchmark process failed: {str(e)}")
|
backend/tasks/create_bench_config_file.py
CHANGED
@@ -13,6 +13,8 @@ from typing import Optional, Dict, Any, List, Tuple
|
|
13 |
from loguru import logger
|
14 |
from huggingface_hub import HfApi
|
15 |
|
|
|
|
|
16 |
|
17 |
class CreateBenchConfigTask:
|
18 |
"""
|
@@ -76,6 +78,26 @@ class CreateBenchConfigTask:
|
|
76 |
self._add_log(f"[ERROR] {error_msg}")
|
77 |
raise RuntimeError(error_msg)
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def generate_base_config(self, hf_org: str, hf_dataset_name: str) -> Dict[str, Any]:
|
80 |
"""
|
81 |
Create the base configuration dictionary
|
@@ -94,6 +116,39 @@ class CreateBenchConfigTask:
|
|
94 |
if not hf_token:
|
95 |
raise RuntimeError("HF_TOKEN environment variable is not defined")
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
return {
|
98 |
"hf_configuration": {
|
99 |
"token": "$HF_TOKEN", # Utiliser directement le token de l'environnement
|
@@ -102,23 +157,10 @@ class CreateBenchConfigTask:
|
|
102 |
"hf_dataset_name": hf_dataset_name,
|
103 |
"concat_if_exist": False,
|
104 |
},
|
105 |
-
"model_list":
|
106 |
-
{
|
107 |
-
"model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
|
108 |
-
"provider": "novita",
|
109 |
-
"api_key": "$HF_TOKEN",
|
110 |
-
"max_concurrent_requests": 32,
|
111 |
-
},
|
112 |
-
{
|
113 |
-
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
114 |
-
"provider": "novita",
|
115 |
-
"api_key": "$HF_TOKEN",
|
116 |
-
"max_concurrent_requests": 32,
|
117 |
-
}
|
118 |
-
],
|
119 |
|
120 |
"model_roles": {
|
121 |
-
"ingestion": ["Qwen/Qwen2.5-
|
122 |
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
|
123 |
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
124 |
"single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
|
@@ -229,6 +271,9 @@ class CreateBenchConfigTask:
|
|
229 |
|
230 |
time.sleep(0.8) # Simulate delay
|
231 |
|
|
|
|
|
|
|
232 |
# Generate and save the configuration
|
233 |
config = self.generate_base_config(org_name, dataset_name)
|
234 |
|
|
|
13 |
from loguru import logger
|
14 |
from huggingface_hub import HfApi
|
15 |
|
16 |
+
from tasks.get_available_model_provider import get_available_model_provider
|
17 |
+
|
18 |
|
19 |
class CreateBenchConfigTask:
|
20 |
"""
|
|
|
78 |
self._add_log(f"[ERROR] {error_msg}")
|
79 |
raise RuntimeError(error_msg)
|
80 |
|
81 |
+
def get_model_provider(self, model_name: str) -> Optional[str]:
|
82 |
+
"""
|
83 |
+
Get the available provider for a model
|
84 |
+
|
85 |
+
Args:
|
86 |
+
model_name: Name of the model to check
|
87 |
+
|
88 |
+
Returns:
|
89 |
+
Available provider or None if none found
|
90 |
+
"""
|
91 |
+
self._add_log(f"[INFO] Finding available provider for {model_name}")
|
92 |
+
provider = get_available_model_provider(model_name, verbose=True)
|
93 |
+
|
94 |
+
if provider:
|
95 |
+
self._add_log(f"[INFO] Found provider for {model_name}: {provider}")
|
96 |
+
return provider
|
97 |
+
else:
|
98 |
+
self._add_log(f"[WARNING] No available provider found for {model_name}")
|
99 |
+
return None
|
100 |
+
|
101 |
def generate_base_config(self, hf_org: str, hf_dataset_name: str) -> Dict[str, Any]:
|
102 |
"""
|
103 |
Create the base configuration dictionary
|
|
|
116 |
if not hf_token:
|
117 |
raise RuntimeError("HF_TOKEN environment variable is not defined")
|
118 |
|
119 |
+
# Get providers for models
|
120 |
+
model_list = []
|
121 |
+
|
122 |
+
# Define required models
|
123 |
+
required_models = [
|
124 |
+
"Qwen/Qwen2.5-72B-Instruct"
|
125 |
+
]
|
126 |
+
|
127 |
+
# Track found models
|
128 |
+
found_models = set()
|
129 |
+
|
130 |
+
for model_name in required_models:
|
131 |
+
provider = self.get_model_provider(model_name)
|
132 |
+
if provider:
|
133 |
+
model_list.append({
|
134 |
+
"model_name": model_name,
|
135 |
+
"provider": provider,
|
136 |
+
"api_key": "$HF_TOKEN",
|
137 |
+
"max_concurrent_requests": 32,
|
138 |
+
})
|
139 |
+
found_models.add(model_name)
|
140 |
+
|
141 |
+
# # Check if both required models are available
|
142 |
+
if len(found_models) < len(required_models):
|
143 |
+
missing_models = set(required_models) - found_models
|
144 |
+
missing_models_str = ", ".join(missing_models)
|
145 |
+
error_msg = f"Required models not available: {missing_models_str}. Cannot proceed with benchmark."
|
146 |
+
self._add_log(f"[ERROR] {error_msg}")
|
147 |
+
raise RuntimeError(error_msg)
|
148 |
+
|
149 |
+
# Mark provider check stage as completed
|
150 |
+
self._add_log("[SUCCESS] Stage completed: provider_check")
|
151 |
+
|
152 |
return {
|
153 |
"hf_configuration": {
|
154 |
"token": "$HF_TOKEN", # Utiliser directement le token de l'environnement
|
|
|
157 |
"hf_dataset_name": hf_dataset_name,
|
158 |
"concat_if_exist": False,
|
159 |
},
|
160 |
+
"model_list": model_list,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
"model_roles": {
|
163 |
+
"ingestion": ["Qwen/Qwen2.5-72B-Instruct"],
|
164 |
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
|
165 |
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
166 |
"single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
|
|
|
271 |
|
272 |
time.sleep(0.8) # Simulate delay
|
273 |
|
274 |
+
# Log the start of finding providers
|
275 |
+
self._add_log("[INFO] Finding available providers for models...")
|
276 |
+
|
277 |
# Generate and save the configuration
|
278 |
config = self.generate_base_config(org_name, dataset_name)
|
279 |
|
backend/tasks/get_available_model_provider.py
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import json
|
4 |
+
from huggingface_hub import model_info, InferenceClient
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
# Define preferred providers
|
8 |
+
PREFERRED_PROVIDERS = ["sambanova", "novita"]
|
9 |
+
|
10 |
+
def filter_providers(providers):
|
11 |
+
"""Filter providers to only include preferred ones."""
|
12 |
+
return [provider for provider in providers if provider in PREFERRED_PROVIDERS]
|
13 |
+
|
14 |
+
def prioritize_providers(providers):
|
15 |
+
"""Prioritize preferred providers, keeping all others."""
|
16 |
+
preferred = [provider for provider in providers if provider in PREFERRED_PROVIDERS]
|
17 |
+
non_preferred = [provider for provider in providers if provider not in PREFERRED_PROVIDERS]
|
18 |
+
return preferred + non_preferred
|
19 |
+
|
20 |
+
# Configure logging
|
21 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
22 |
+
logger = logging.getLogger(__name__)
|
23 |
+
|
24 |
+
def is_vision_model(model_name: str) -> bool:
|
25 |
+
"""
|
26 |
+
Check if the model is a vision model based on its name
|
27 |
+
|
28 |
+
Args:
|
29 |
+
model_name: Name of the model
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
True if it's a vision model, False otherwise
|
33 |
+
"""
|
34 |
+
vision_indicators = ["-VL-", "vision", "clip", "image"]
|
35 |
+
return any(indicator in model_name.lower() for indicator in vision_indicators)
|
36 |
+
|
37 |
+
def get_test_payload(model_name: str) -> dict:
|
38 |
+
"""
|
39 |
+
Get the appropriate test payload based on model type
|
40 |
+
|
41 |
+
Args:
|
42 |
+
model_name: Name of the model
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
Dictionary containing the test payload
|
46 |
+
"""
|
47 |
+
# We're only testing text models now
|
48 |
+
return {
|
49 |
+
"inputs": "Hello",
|
50 |
+
"parameters": {
|
51 |
+
"max_new_tokens": 5
|
52 |
+
}
|
53 |
+
}
|
54 |
+
|
55 |
+
def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool:
|
56 |
+
"""
|
57 |
+
Test if a specific provider is available for a model using InferenceClient
|
58 |
+
|
59 |
+
Args:
|
60 |
+
model_name: Name of the model
|
61 |
+
provider: Provider to test
|
62 |
+
verbose: Whether to log detailed information
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
True if the provider is available, False otherwise
|
66 |
+
"""
|
67 |
+
try:
|
68 |
+
# Load environment variables
|
69 |
+
load_dotenv()
|
70 |
+
|
71 |
+
# Get HF token from environment
|
72 |
+
hf_token = os.environ.get("HF_TOKEN")
|
73 |
+
if not hf_token:
|
74 |
+
raise ValueError("HF_TOKEN not defined in environment")
|
75 |
+
|
76 |
+
if verbose:
|
77 |
+
logger.info(f"Testing provider {provider} for model {model_name}")
|
78 |
+
|
79 |
+
# Initialize the InferenceClient with the specific provider
|
80 |
+
client = InferenceClient(
|
81 |
+
model=model_name,
|
82 |
+
token=hf_token,
|
83 |
+
provider=provider,
|
84 |
+
timeout=10 # Increased timeout to allow model loading
|
85 |
+
)
|
86 |
+
|
87 |
+
try:
|
88 |
+
# Use the chat completions method for testing
|
89 |
+
response = client.chat_completion(
|
90 |
+
messages=[{"role": "user", "content": "Hello"}],
|
91 |
+
max_tokens=5
|
92 |
+
)
|
93 |
+
|
94 |
+
if verbose:
|
95 |
+
logger.info(f"Provider {provider} is available for {model_name}")
|
96 |
+
return True
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
if verbose:
|
100 |
+
error_message = str(e)
|
101 |
+
logger.error(f"Error with provider {provider}: {error_message}")
|
102 |
+
|
103 |
+
# Log specific error types if we can identify them
|
104 |
+
if "status_code=429" in error_message:
|
105 |
+
logger.warning(f"Provider {provider} rate limited. You may need to wait or upgrade your plan.")
|
106 |
+
elif "status_code=401" in error_message:
|
107 |
+
logger.warning(f"Authentication failed for provider {provider}. Check your token.")
|
108 |
+
elif "status_code=503" in error_message:
|
109 |
+
logger.warning(f"Provider {provider} service unavailable. Model may be loading or provider is down.")
|
110 |
+
elif "timed out" in error_message.lower():
|
111 |
+
logger.error(f"Timeout error with provider {provider} - request timed out after 10 seconds")
|
112 |
+
return False
|
113 |
+
|
114 |
+
except Exception as e:
|
115 |
+
if verbose:
|
116 |
+
logger.error(f"Error in test_provider: {str(e)}")
|
117 |
+
return False
|
118 |
+
|
119 |
+
def get_available_model_provider(model_name, verbose=False):
|
120 |
+
"""
|
121 |
+
Get the first available provider for a given model.
|
122 |
+
|
123 |
+
Args:
|
124 |
+
model_name: Name of the model on the Hub
|
125 |
+
verbose: Whether to log detailed information
|
126 |
+
|
127 |
+
Returns:
|
128 |
+
First available provider or None if none are available
|
129 |
+
"""
|
130 |
+
try:
|
131 |
+
# Load environment variables
|
132 |
+
load_dotenv()
|
133 |
+
|
134 |
+
# Get HF token from environment
|
135 |
+
hf_token = os.environ.get("HF_TOKEN")
|
136 |
+
if not hf_token:
|
137 |
+
raise ValueError("HF_TOKEN not defined in environment")
|
138 |
+
|
139 |
+
# Get providers for the model and prioritize them
|
140 |
+
info = model_info(model_name, expand="inferenceProviderMapping")
|
141 |
+
if not hasattr(info, "inference_provider_mapping"):
|
142 |
+
if verbose:
|
143 |
+
logger.info(f"No inference providers found for {model_name}")
|
144 |
+
return None
|
145 |
+
|
146 |
+
providers = list(info.inference_provider_mapping.keys())
|
147 |
+
if not providers:
|
148 |
+
if verbose:
|
149 |
+
logger.info(f"Empty list of providers for {model_name}")
|
150 |
+
return None
|
151 |
+
|
152 |
+
# Prioritize providers
|
153 |
+
providers = prioritize_providers(providers)
|
154 |
+
|
155 |
+
if verbose:
|
156 |
+
logger.info(f"Available providers for {model_name}: {', '.join(providers)}")
|
157 |
+
|
158 |
+
# Test each provider
|
159 |
+
for provider in providers:
|
160 |
+
if test_provider(model_name, provider, verbose):
|
161 |
+
return provider
|
162 |
+
|
163 |
+
return None
|
164 |
+
|
165 |
+
except Exception as e:
|
166 |
+
if verbose:
|
167 |
+
logger.error(f"Error in get_available_model_provider: {str(e)}")
|
168 |
+
return None
|
169 |
+
|
170 |
+
if __name__ == "__main__":
|
171 |
+
# # Example usage with verbose mode enabled
|
172 |
+
# model = "Qwen/Qwen2.5-72B-Instruct"
|
173 |
+
|
174 |
+
# # Test sambanova provider
|
175 |
+
# print("\nTesting sambanova provider:")
|
176 |
+
# sambanova_available = test_provider(model, "sambanova", verbose=True)
|
177 |
+
# print(f"sambanova available: {sambanova_available}")
|
178 |
+
|
179 |
+
# # Test novita provider
|
180 |
+
# print("\nTesting novita provider:")
|
181 |
+
# novita_available = test_provider(model, "novita", verbose=True)
|
182 |
+
# print(f"novita available: {novita_available}")
|
183 |
+
|
184 |
+
# # Test automatic provider selection
|
185 |
+
# print("\nTesting automatic provider selection:")
|
186 |
+
# provider = get_available_model_provider(model, verbose=True)
|
187 |
+
# print(f"Selected provider: {provider}")
|
188 |
+
|
189 |
+
models = [
|
190 |
+
"Qwen/QwQ-32B",
|
191 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
192 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
193 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
194 |
+
"mistralai/Mistral-Small-24B-Instruct-2501",
|
195 |
+
]
|
196 |
+
|
197 |
+
providers = []
|
198 |
+
|
199 |
+
for model in models:
|
200 |
+
provider = get_available_model_provider(model, verbose=True)
|
201 |
+
providers.append(provider)
|
202 |
+
|
203 |
+
print(f"Providers {len(providers)}: {providers}")
|
204 |
+
|
205 |
+
|
206 |
+
# print("\nTesting novita provider:")
|
207 |
+
# novita_available = test_provider("deepseek-ai/DeepSeek-V3-0324", "novita", verbose=True)
|
208 |
+
# print(f"novita available: {novita_available}")
|
backend/tests/test_provider_rate_limits.py
ADDED
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
"""
|
3 |
+
Script to test rate limits of Hugging Face Inference API providers.
|
4 |
+
Spams requests to a model/provider and collects error messages.
|
5 |
+
|
6 |
+
Usage: python test_provider_rate_limits.py --model "model_name" --provider "provider_name" --requests 50
|
7 |
+
"""
|
8 |
+
|
9 |
+
import argparse
|
10 |
+
import json
|
11 |
+
import time
|
12 |
+
import os
|
13 |
+
import requests
|
14 |
+
import sys
|
15 |
+
import logging
|
16 |
+
from concurrent.futures import ThreadPoolExecutor
|
17 |
+
from collections import Counter
|
18 |
+
from typing import Dict, List, Tuple
|
19 |
+
from dotenv import load_dotenv
|
20 |
+
|
21 |
+
# Add parent directory to path to import from tasks
|
22 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
23 |
+
from tasks.get_available_model_provider import prioritize_providers
|
24 |
+
|
25 |
+
# Configure logging
|
26 |
+
logging.basicConfig(
|
27 |
+
level=logging.INFO,
|
28 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
29 |
+
)
|
30 |
+
logger = logging.getLogger("rate_limit_test")
|
31 |
+
|
32 |
+
# Default model to test
|
33 |
+
DEFAULT_MODEL = "meta-llama/Llama-3.3-70B-Instruct"
|
34 |
+
|
35 |
+
def send_request(model: str, provider: str, token: str, request_id: int) -> Dict:
|
36 |
+
"""
|
37 |
+
Send a single request to the model with the given provider.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
model: Model name
|
41 |
+
provider: Provider name
|
42 |
+
token: HF token
|
43 |
+
request_id: ID for this request
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
Dictionary with request info and result
|
47 |
+
"""
|
48 |
+
headers = {
|
49 |
+
"Authorization": f"Bearer {token}",
|
50 |
+
"Content-Type": "application/json"
|
51 |
+
}
|
52 |
+
|
53 |
+
payload = {
|
54 |
+
"inputs": f"Request {request_id}: Hello, what do you thing about the future of AI? And divide me 10 by {request_id}",
|
55 |
+
"parameters": {
|
56 |
+
"max_new_tokens": 10000,
|
57 |
+
"provider": provider
|
58 |
+
}
|
59 |
+
}
|
60 |
+
|
61 |
+
api_url = f"https://api-inference.huggingface.co/models/{model}"
|
62 |
+
|
63 |
+
start_time = time.time()
|
64 |
+
try:
|
65 |
+
response = requests.post(api_url, headers=headers, json=payload, timeout=15)
|
66 |
+
end_time = time.time()
|
67 |
+
|
68 |
+
result = {
|
69 |
+
"request_id": request_id,
|
70 |
+
"status_code": response.status_code,
|
71 |
+
"time_taken": end_time - start_time,
|
72 |
+
"headers": dict(response.headers),
|
73 |
+
"success": response.status_code == 200,
|
74 |
+
}
|
75 |
+
|
76 |
+
if response.status_code != 200:
|
77 |
+
try:
|
78 |
+
error_data = response.json()
|
79 |
+
if isinstance(error_data, dict) and "error" in error_data:
|
80 |
+
result["error_message"] = error_data["error"]
|
81 |
+
else:
|
82 |
+
result["error_message"] = str(error_data)
|
83 |
+
except:
|
84 |
+
result["error_message"] = response.text
|
85 |
+
|
86 |
+
return result
|
87 |
+
|
88 |
+
except Exception as e:
|
89 |
+
end_time = time.time()
|
90 |
+
return {
|
91 |
+
"request_id": request_id,
|
92 |
+
"status_code": 0,
|
93 |
+
"time_taken": end_time - start_time,
|
94 |
+
"success": False,
|
95 |
+
"error_message": str(e)
|
96 |
+
}
|
97 |
+
|
98 |
+
def run_rate_limit_test(model: str, provider: str = None, num_requests: int = 50,
|
99 |
+
max_workers: int = 10, delay: float = 0.1) -> List[Dict]:
|
100 |
+
"""
|
101 |
+
Run a rate limit test by sending multiple requests to the specified model/provider.
|
102 |
+
|
103 |
+
Args:
|
104 |
+
model: Model to test
|
105 |
+
provider: Provider to test (if None, will use first available)
|
106 |
+
num_requests: Number of requests to send
|
107 |
+
max_workers: Maximum number of concurrent workers
|
108 |
+
delay: Delay between batches of requests
|
109 |
+
|
110 |
+
Returns:
|
111 |
+
List of results for each request
|
112 |
+
"""
|
113 |
+
# Load environment variables
|
114 |
+
load_dotenv()
|
115 |
+
|
116 |
+
# Get HF token
|
117 |
+
hf_token = os.environ.get("HF_TOKEN")
|
118 |
+
if not hf_token:
|
119 |
+
logger.error("HF_TOKEN not defined in environment")
|
120 |
+
return []
|
121 |
+
|
122 |
+
# If provider not specified, get first available
|
123 |
+
if not provider:
|
124 |
+
from tasks.get_available_model_provider import get_available_model_provider
|
125 |
+
provider = get_available_model_provider(model)
|
126 |
+
if not provider:
|
127 |
+
logger.error(f"No available provider found for {model}")
|
128 |
+
return []
|
129 |
+
|
130 |
+
logger.info(f"Testing rate limits for {model} with provider: {provider}")
|
131 |
+
logger.info(f"Sending {num_requests} requests with {max_workers} concurrent workers")
|
132 |
+
|
133 |
+
# Send requests in parallel
|
134 |
+
results = []
|
135 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
136 |
+
future_to_id = {
|
137 |
+
executor.submit(send_request, model, provider, hf_token, i): i
|
138 |
+
for i in range(num_requests)
|
139 |
+
}
|
140 |
+
|
141 |
+
completed = 0
|
142 |
+
for future in future_to_id:
|
143 |
+
result = future.result()
|
144 |
+
results.append(result)
|
145 |
+
|
146 |
+
completed += 1
|
147 |
+
if completed % 10 == 0:
|
148 |
+
logger.info(f"Completed {completed}/{num_requests} requests")
|
149 |
+
|
150 |
+
# Add a small delay periodically to avoid overwhelming the API
|
151 |
+
if completed % max_workers == 0:
|
152 |
+
time.sleep(delay)
|
153 |
+
|
154 |
+
return results
|
155 |
+
|
156 |
+
def analyze_results(results: List[Dict]) -> Dict:
|
157 |
+
"""
|
158 |
+
Analyze the results of the rate limit test.
|
159 |
+
|
160 |
+
Args:
|
161 |
+
results: List of request results
|
162 |
+
|
163 |
+
Returns:
|
164 |
+
Dictionary with analysis
|
165 |
+
"""
|
166 |
+
total_requests = len(results)
|
167 |
+
successful = sum(1 for r in results if r["success"])
|
168 |
+
failed = total_requests - successful
|
169 |
+
|
170 |
+
# Count different error messages
|
171 |
+
error_messages = Counter(r.get("error_message") for r in results if not r["success"])
|
172 |
+
|
173 |
+
# Calculate timing statistics
|
174 |
+
times = [r["time_taken"] for r in results]
|
175 |
+
avg_time = sum(times) / len(times) if times else 0
|
176 |
+
|
177 |
+
# Check for rate limiting headers
|
178 |
+
rate_limit_headers = set()
|
179 |
+
for r in results:
|
180 |
+
if "headers" in r:
|
181 |
+
for header in r["headers"]:
|
182 |
+
if "rate" in header.lower() or "limit" in header.lower():
|
183 |
+
rate_limit_headers.add(header)
|
184 |
+
|
185 |
+
return {
|
186 |
+
"total_requests": total_requests,
|
187 |
+
"successful_requests": successful,
|
188 |
+
"failed_requests": failed,
|
189 |
+
"success_rate": successful / total_requests if total_requests > 0 else 0,
|
190 |
+
"average_time": avg_time,
|
191 |
+
"error_messages": dict(error_messages),
|
192 |
+
"rate_limit_headers": list(rate_limit_headers)
|
193 |
+
}
|
194 |
+
|
195 |
+
def display_results(results: List[Dict], analysis: Dict) -> None:
|
196 |
+
"""
|
197 |
+
Display the results of the rate limit test.
|
198 |
+
|
199 |
+
Args:
|
200 |
+
results: List of request results
|
201 |
+
analysis: Analysis of results
|
202 |
+
"""
|
203 |
+
print("\n" + "="*80)
|
204 |
+
print(f"RATE LIMIT TEST RESULTS")
|
205 |
+
print("="*80)
|
206 |
+
|
207 |
+
print(f"\nTotal Requests: {analysis['total_requests']}")
|
208 |
+
print(f"Successful: {analysis['successful_requests']} ({analysis['success_rate']*100:.1f}%)")
|
209 |
+
print(f"Failed: {analysis['failed_requests']}")
|
210 |
+
print(f"Average Time: {analysis['average_time']:.3f} seconds")
|
211 |
+
|
212 |
+
if analysis["rate_limit_headers"]:
|
213 |
+
print("\nRate Limit Headers Found:")
|
214 |
+
for header in analysis["rate_limit_headers"]:
|
215 |
+
print(f" - {header}")
|
216 |
+
|
217 |
+
if analysis["error_messages"]:
|
218 |
+
print("\nError Messages:")
|
219 |
+
for msg, count in analysis["error_messages"].items():
|
220 |
+
print(f" - [{count} occurrences] {msg}")
|
221 |
+
|
222 |
+
# Print sample of headers from a failed request
|
223 |
+
failed_requests = [r for r in results if not r["success"]]
|
224 |
+
if failed_requests:
|
225 |
+
print("\nSample Headers from a Failed Request:")
|
226 |
+
for header, value in failed_requests[0].get("headers", {}).items():
|
227 |
+
print(f" {header}: {value}")
|
228 |
+
|
229 |
+
def main():
|
230 |
+
"""
|
231 |
+
Main entry point for the script.
|
232 |
+
"""
|
233 |
+
parser = argparse.ArgumentParser(description="Test rate limits of Hugging Face Inference API providers.")
|
234 |
+
parser.add_argument("--model", type=str, default=DEFAULT_MODEL, help="Name of the model to test")
|
235 |
+
parser.add_argument("--provider", type=str, help="Name of the provider to test (if not specified, will use first available)")
|
236 |
+
parser.add_argument("--requests", type=int, default=50, help="Number of requests to send")
|
237 |
+
parser.add_argument("--workers", type=int, default=10, help="Maximum number of concurrent workers")
|
238 |
+
parser.add_argument("--delay", type=float, default=0.1, help="Delay between batches of requests")
|
239 |
+
parser.add_argument("--output", type=str, help="Path to save results as JSON (optional)")
|
240 |
+
|
241 |
+
args = parser.parse_args()
|
242 |
+
|
243 |
+
# Run the test
|
244 |
+
results = run_rate_limit_test(
|
245 |
+
model=args.model,
|
246 |
+
provider=args.provider,
|
247 |
+
num_requests=args.requests,
|
248 |
+
max_workers=args.workers,
|
249 |
+
delay=args.delay
|
250 |
+
)
|
251 |
+
|
252 |
+
if not results:
|
253 |
+
logger.error("Test failed to run properly")
|
254 |
+
return
|
255 |
+
|
256 |
+
# Analyze the results
|
257 |
+
analysis = analyze_results(results)
|
258 |
+
|
259 |
+
# Display the results
|
260 |
+
display_results(results, analysis)
|
261 |
+
|
262 |
+
# Save results if requested
|
263 |
+
if args.output:
|
264 |
+
with open(args.output, "w") as f:
|
265 |
+
json.dump({
|
266 |
+
"results": results,
|
267 |
+
"analysis": analysis
|
268 |
+
}, f, indent=2)
|
269 |
+
logger.info(f"Results saved to {args.output}")
|
270 |
+
|
271 |
+
if __name__ == "__main__":
|
272 |
+
main()
|
frontend/src/components/BenchmarkGenerator.jsx
CHANGED
@@ -12,21 +12,29 @@ const SIMULATION_DURATION = 120000; // 20 secondes
|
|
12 |
// Define all benchmark steps in sequence
|
13 |
const BENCHMARK_STEPS = [
|
14 |
"configuration",
|
|
|
15 |
"ingestion",
|
16 |
"upload_ingest_to_hub",
|
17 |
"summarization",
|
18 |
"chunking",
|
19 |
"single_shot_question_generation",
|
|
|
|
|
|
|
20 |
];
|
21 |
|
22 |
// Step labels for display (more user-friendly names)
|
23 |
const STEP_LABELS = {
|
24 |
configuration: "Configuration",
|
|
|
25 |
ingestion: "Ingestion",
|
26 |
upload_ingest_to_hub: "Upload to Hub",
|
27 |
summarization: "Summarization",
|
28 |
chunking: "Chunking",
|
29 |
single_shot_question_generation: "Question generation",
|
|
|
|
|
|
|
30 |
};
|
31 |
|
32 |
// Simulated log messages for pre-calculated documents
|
@@ -34,6 +42,8 @@ const SIMULATED_LOGS = [
|
|
34 |
"[INFO] Initializing benchmark generation...",
|
35 |
"[INFO] Generating base configuration file...",
|
36 |
"[SUCCESS] Stage completed: configuration",
|
|
|
|
|
37 |
"[INFO] Starting ingestion process...",
|
38 |
"[SUCCESS] Stage completed: ingestion",
|
39 |
"[INFO] Processing document content for upload...",
|
|
|
12 |
// Define all benchmark steps in sequence
|
13 |
const BENCHMARK_STEPS = [
|
14 |
"configuration",
|
15 |
+
"provider_check",
|
16 |
"ingestion",
|
17 |
"upload_ingest_to_hub",
|
18 |
"summarization",
|
19 |
"chunking",
|
20 |
"single_shot_question_generation",
|
21 |
+
"evaluation_provider_check",
|
22 |
+
"evaluation",
|
23 |
+
"evaluation_saving_results",
|
24 |
];
|
25 |
|
26 |
// Step labels for display (more user-friendly names)
|
27 |
const STEP_LABELS = {
|
28 |
configuration: "Configuration",
|
29 |
+
provider_check: "Finding providers",
|
30 |
ingestion: "Ingestion",
|
31 |
upload_ingest_to_hub: "Upload to Hub",
|
32 |
summarization: "Summarization",
|
33 |
chunking: "Chunking",
|
34 |
single_shot_question_generation: "Question generation",
|
35 |
+
evaluation_provider_check: "Checking evaluation providers",
|
36 |
+
evaluation: "Running evaluations",
|
37 |
+
evaluation_saving_results: "Saving evaluation results",
|
38 |
};
|
39 |
|
40 |
// Simulated log messages for pre-calculated documents
|
|
|
42 |
"[INFO] Initializing benchmark generation...",
|
43 |
"[INFO] Generating base configuration file...",
|
44 |
"[SUCCESS] Stage completed: configuration",
|
45 |
+
"[INFO] Finding available providers for models...",
|
46 |
+
"[SUCCESS] Stage completed: provider_check",
|
47 |
"[INFO] Starting ingestion process...",
|
48 |
"[SUCCESS] Stage completed: ingestion",
|
49 |
"[INFO] Processing document content for upload...",
|