tfrere commited on
Commit
0e34dc4
·
1 Parent(s): 2484201

add get available model provider to benchmark generation

Browse files
backend/results.json ADDED
The diff for this file is too large to render. See raw diff
 
backend/routes/benchmark.py CHANGED
@@ -162,40 +162,53 @@ class UnifiedBenchmarkTask:
162
  self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
163
 
164
  # Execute the configuration task
165
- config_path = self.config_task.run(file_path=file_path)
166
-
167
- # Get configuration logs
168
- config_logs = self.config_task.get_logs()
169
- for log in config_logs:
170
- self._add_log(log)
171
-
172
- # Mark configuration step as completed
173
- if "[SUCCESS] Stage completed: config_generation" not in self.logs:
174
- self._add_log("[SUCCESS] Stage completed: configuration")
175
-
176
- # Step 2: Benchmark
177
- self._add_log("[INFO] Starting benchmark process")
178
- self.bench_task = CreateBenchTask(session_uid=self.session_uid, config_path=config_path)
179
-
180
- # Run the benchmark task
181
- self.bench_task.run()
182
-
183
- # Wait for the benchmark task to complete
184
- while not self.bench_task.is_task_completed():
185
- # Get new logs and add them
186
- bench_logs = self.bench_task.get_logs()
187
- for log in bench_logs:
188
  self._add_log(log)
189
- time.sleep(1)
190
-
191
- # Get final logs
192
- final_logs = self.bench_task.get_logs()
193
- for log in final_logs:
194
- self._add_log(log)
195
-
196
- # Mark as completed
197
- self.is_completed = True
198
- self._add_log("[SUCCESS] Benchmark process completed successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  except Exception as e:
201
  self._add_log(f"[ERROR] Benchmark process failed: {str(e)}")
 
162
  self.config_task = CreateBenchConfigTask(session_uid=self.session_uid)
163
 
164
  # Execute the configuration task
165
+ try:
166
+ config_path = self.config_task.run(file_path=file_path)
167
+
168
+ # Get configuration logs
169
+ config_logs = self.config_task.get_logs()
170
+ for log in config_logs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  self._add_log(log)
172
+
173
+ # Mark configuration step as completed
174
+ if "[SUCCESS] Stage completed: config_generation" not in self.logs:
175
+ self._add_log("[SUCCESS] Stage completed: configuration")
176
+
177
+ # Step 2: Benchmark
178
+ self._add_log("[INFO] Starting benchmark process")
179
+ self.bench_task = CreateBenchTask(session_uid=self.session_uid, config_path=config_path)
180
+
181
+ # Run the benchmark task
182
+ self.bench_task.run()
183
+
184
+ # Wait for the benchmark task to complete
185
+ while not self.bench_task.is_task_completed():
186
+ # Get new logs and add them
187
+ bench_logs = self.bench_task.get_logs()
188
+ for log in bench_logs:
189
+ self._add_log(log)
190
+ time.sleep(1)
191
+
192
+ # Get final logs
193
+ final_logs = self.bench_task.get_logs()
194
+ for log in final_logs:
195
+ self._add_log(log)
196
+
197
+ # Mark as completed
198
+ self.is_completed = True
199
+ self._add_log("[SUCCESS] Benchmark process completed successfully")
200
+
201
+ except Exception as config_error:
202
+ error_msg = str(config_error)
203
+ # Log detailed error
204
+ self._add_log(f"[ERROR] Configuration failed: {error_msg}")
205
+
206
+ # Check if it's a provider error and provide a more user-friendly message
207
+ if "Required models not available" in error_msg:
208
+ self._add_log("[ERROR] Some required models are not available at the moment. Please try again later.")
209
+
210
+ # Mark as completed with error
211
+ self.is_completed = True
212
 
213
  except Exception as e:
214
  self._add_log(f"[ERROR] Benchmark process failed: {str(e)}")
backend/tasks/create_bench_config_file.py CHANGED
@@ -13,6 +13,8 @@ from typing import Optional, Dict, Any, List, Tuple
13
  from loguru import logger
14
  from huggingface_hub import HfApi
15
 
 
 
16
 
17
  class CreateBenchConfigTask:
18
  """
@@ -76,6 +78,26 @@ class CreateBenchConfigTask:
76
  self._add_log(f"[ERROR] {error_msg}")
77
  raise RuntimeError(error_msg)
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def generate_base_config(self, hf_org: str, hf_dataset_name: str) -> Dict[str, Any]:
80
  """
81
  Create the base configuration dictionary
@@ -94,6 +116,39 @@ class CreateBenchConfigTask:
94
  if not hf_token:
95
  raise RuntimeError("HF_TOKEN environment variable is not defined")
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  return {
98
  "hf_configuration": {
99
  "token": "$HF_TOKEN", # Utiliser directement le token de l'environnement
@@ -102,23 +157,10 @@ class CreateBenchConfigTask:
102
  "hf_dataset_name": hf_dataset_name,
103
  "concat_if_exist": False,
104
  },
105
- "model_list": [
106
- {
107
- "model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
108
- "provider": "novita",
109
- "api_key": "$HF_TOKEN",
110
- "max_concurrent_requests": 32,
111
- },
112
- {
113
- "model_name": "Qwen/Qwen2.5-72B-Instruct",
114
- "provider": "novita",
115
- "api_key": "$HF_TOKEN",
116
- "max_concurrent_requests": 32,
117
- }
118
- ],
119
 
120
  "model_roles": {
121
- "ingestion": ["Qwen/Qwen2.5-VL-72B-Instruct"],
122
  "summarization": ["Qwen/Qwen2.5-72B-Instruct"],
123
  "chunking": ["intfloat/multilingual-e5-large-instruct"],
124
  "single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
@@ -229,6 +271,9 @@ class CreateBenchConfigTask:
229
 
230
  time.sleep(0.8) # Simulate delay
231
 
 
 
 
232
  # Generate and save the configuration
233
  config = self.generate_base_config(org_name, dataset_name)
234
 
 
13
  from loguru import logger
14
  from huggingface_hub import HfApi
15
 
16
+ from tasks.get_available_model_provider import get_available_model_provider
17
+
18
 
19
  class CreateBenchConfigTask:
20
  """
 
78
  self._add_log(f"[ERROR] {error_msg}")
79
  raise RuntimeError(error_msg)
80
 
81
+ def get_model_provider(self, model_name: str) -> Optional[str]:
82
+ """
83
+ Get the available provider for a model
84
+
85
+ Args:
86
+ model_name: Name of the model to check
87
+
88
+ Returns:
89
+ Available provider or None if none found
90
+ """
91
+ self._add_log(f"[INFO] Finding available provider for {model_name}")
92
+ provider = get_available_model_provider(model_name, verbose=True)
93
+
94
+ if provider:
95
+ self._add_log(f"[INFO] Found provider for {model_name}: {provider}")
96
+ return provider
97
+ else:
98
+ self._add_log(f"[WARNING] No available provider found for {model_name}")
99
+ return None
100
+
101
  def generate_base_config(self, hf_org: str, hf_dataset_name: str) -> Dict[str, Any]:
102
  """
103
  Create the base configuration dictionary
 
116
  if not hf_token:
117
  raise RuntimeError("HF_TOKEN environment variable is not defined")
118
 
119
+ # Get providers for models
120
+ model_list = []
121
+
122
+ # Define required models
123
+ required_models = [
124
+ "Qwen/Qwen2.5-72B-Instruct"
125
+ ]
126
+
127
+ # Track found models
128
+ found_models = set()
129
+
130
+ for model_name in required_models:
131
+ provider = self.get_model_provider(model_name)
132
+ if provider:
133
+ model_list.append({
134
+ "model_name": model_name,
135
+ "provider": provider,
136
+ "api_key": "$HF_TOKEN",
137
+ "max_concurrent_requests": 32,
138
+ })
139
+ found_models.add(model_name)
140
+
141
+ # # Check if both required models are available
142
+ if len(found_models) < len(required_models):
143
+ missing_models = set(required_models) - found_models
144
+ missing_models_str = ", ".join(missing_models)
145
+ error_msg = f"Required models not available: {missing_models_str}. Cannot proceed with benchmark."
146
+ self._add_log(f"[ERROR] {error_msg}")
147
+ raise RuntimeError(error_msg)
148
+
149
+ # Mark provider check stage as completed
150
+ self._add_log("[SUCCESS] Stage completed: provider_check")
151
+
152
  return {
153
  "hf_configuration": {
154
  "token": "$HF_TOKEN", # Utiliser directement le token de l'environnement
 
157
  "hf_dataset_name": hf_dataset_name,
158
  "concat_if_exist": False,
159
  },
160
+ "model_list": model_list,
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  "model_roles": {
163
+ "ingestion": ["Qwen/Qwen2.5-72B-Instruct"],
164
  "summarization": ["Qwen/Qwen2.5-72B-Instruct"],
165
  "chunking": ["intfloat/multilingual-e5-large-instruct"],
166
  "single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
 
271
 
272
  time.sleep(0.8) # Simulate delay
273
 
274
+ # Log the start of finding providers
275
+ self._add_log("[INFO] Finding available providers for models...")
276
+
277
  # Generate and save the configuration
278
  config = self.generate_base_config(org_name, dataset_name)
279
 
backend/tasks/get_available_model_provider.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ from huggingface_hub import model_info, InferenceClient
5
+ from dotenv import load_dotenv
6
+
7
+ # Define preferred providers
8
+ PREFERRED_PROVIDERS = ["sambanova", "novita"]
9
+
10
+ def filter_providers(providers):
11
+ """Filter providers to only include preferred ones."""
12
+ return [provider for provider in providers if provider in PREFERRED_PROVIDERS]
13
+
14
+ def prioritize_providers(providers):
15
+ """Prioritize preferred providers, keeping all others."""
16
+ preferred = [provider for provider in providers if provider in PREFERRED_PROVIDERS]
17
+ non_preferred = [provider for provider in providers if provider not in PREFERRED_PROVIDERS]
18
+ return preferred + non_preferred
19
+
20
+ # Configure logging
21
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
22
+ logger = logging.getLogger(__name__)
23
+
24
+ def is_vision_model(model_name: str) -> bool:
25
+ """
26
+ Check if the model is a vision model based on its name
27
+
28
+ Args:
29
+ model_name: Name of the model
30
+
31
+ Returns:
32
+ True if it's a vision model, False otherwise
33
+ """
34
+ vision_indicators = ["-VL-", "vision", "clip", "image"]
35
+ return any(indicator in model_name.lower() for indicator in vision_indicators)
36
+
37
+ def get_test_payload(model_name: str) -> dict:
38
+ """
39
+ Get the appropriate test payload based on model type
40
+
41
+ Args:
42
+ model_name: Name of the model
43
+
44
+ Returns:
45
+ Dictionary containing the test payload
46
+ """
47
+ # We're only testing text models now
48
+ return {
49
+ "inputs": "Hello",
50
+ "parameters": {
51
+ "max_new_tokens": 5
52
+ }
53
+ }
54
+
55
+ def test_provider(model_name: str, provider: str, verbose: bool = False) -> bool:
56
+ """
57
+ Test if a specific provider is available for a model using InferenceClient
58
+
59
+ Args:
60
+ model_name: Name of the model
61
+ provider: Provider to test
62
+ verbose: Whether to log detailed information
63
+
64
+ Returns:
65
+ True if the provider is available, False otherwise
66
+ """
67
+ try:
68
+ # Load environment variables
69
+ load_dotenv()
70
+
71
+ # Get HF token from environment
72
+ hf_token = os.environ.get("HF_TOKEN")
73
+ if not hf_token:
74
+ raise ValueError("HF_TOKEN not defined in environment")
75
+
76
+ if verbose:
77
+ logger.info(f"Testing provider {provider} for model {model_name}")
78
+
79
+ # Initialize the InferenceClient with the specific provider
80
+ client = InferenceClient(
81
+ model=model_name,
82
+ token=hf_token,
83
+ provider=provider,
84
+ timeout=10 # Increased timeout to allow model loading
85
+ )
86
+
87
+ try:
88
+ # Use the chat completions method for testing
89
+ response = client.chat_completion(
90
+ messages=[{"role": "user", "content": "Hello"}],
91
+ max_tokens=5
92
+ )
93
+
94
+ if verbose:
95
+ logger.info(f"Provider {provider} is available for {model_name}")
96
+ return True
97
+
98
+ except Exception as e:
99
+ if verbose:
100
+ error_message = str(e)
101
+ logger.error(f"Error with provider {provider}: {error_message}")
102
+
103
+ # Log specific error types if we can identify them
104
+ if "status_code=429" in error_message:
105
+ logger.warning(f"Provider {provider} rate limited. You may need to wait or upgrade your plan.")
106
+ elif "status_code=401" in error_message:
107
+ logger.warning(f"Authentication failed for provider {provider}. Check your token.")
108
+ elif "status_code=503" in error_message:
109
+ logger.warning(f"Provider {provider} service unavailable. Model may be loading or provider is down.")
110
+ elif "timed out" in error_message.lower():
111
+ logger.error(f"Timeout error with provider {provider} - request timed out after 10 seconds")
112
+ return False
113
+
114
+ except Exception as e:
115
+ if verbose:
116
+ logger.error(f"Error in test_provider: {str(e)}")
117
+ return False
118
+
119
+ def get_available_model_provider(model_name, verbose=False):
120
+ """
121
+ Get the first available provider for a given model.
122
+
123
+ Args:
124
+ model_name: Name of the model on the Hub
125
+ verbose: Whether to log detailed information
126
+
127
+ Returns:
128
+ First available provider or None if none are available
129
+ """
130
+ try:
131
+ # Load environment variables
132
+ load_dotenv()
133
+
134
+ # Get HF token from environment
135
+ hf_token = os.environ.get("HF_TOKEN")
136
+ if not hf_token:
137
+ raise ValueError("HF_TOKEN not defined in environment")
138
+
139
+ # Get providers for the model and prioritize them
140
+ info = model_info(model_name, expand="inferenceProviderMapping")
141
+ if not hasattr(info, "inference_provider_mapping"):
142
+ if verbose:
143
+ logger.info(f"No inference providers found for {model_name}")
144
+ return None
145
+
146
+ providers = list(info.inference_provider_mapping.keys())
147
+ if not providers:
148
+ if verbose:
149
+ logger.info(f"Empty list of providers for {model_name}")
150
+ return None
151
+
152
+ # Prioritize providers
153
+ providers = prioritize_providers(providers)
154
+
155
+ if verbose:
156
+ logger.info(f"Available providers for {model_name}: {', '.join(providers)}")
157
+
158
+ # Test each provider
159
+ for provider in providers:
160
+ if test_provider(model_name, provider, verbose):
161
+ return provider
162
+
163
+ return None
164
+
165
+ except Exception as e:
166
+ if verbose:
167
+ logger.error(f"Error in get_available_model_provider: {str(e)}")
168
+ return None
169
+
170
+ if __name__ == "__main__":
171
+ # # Example usage with verbose mode enabled
172
+ # model = "Qwen/Qwen2.5-72B-Instruct"
173
+
174
+ # # Test sambanova provider
175
+ # print("\nTesting sambanova provider:")
176
+ # sambanova_available = test_provider(model, "sambanova", verbose=True)
177
+ # print(f"sambanova available: {sambanova_available}")
178
+
179
+ # # Test novita provider
180
+ # print("\nTesting novita provider:")
181
+ # novita_available = test_provider(model, "novita", verbose=True)
182
+ # print(f"novita available: {novita_available}")
183
+
184
+ # # Test automatic provider selection
185
+ # print("\nTesting automatic provider selection:")
186
+ # provider = get_available_model_provider(model, verbose=True)
187
+ # print(f"Selected provider: {provider}")
188
+
189
+ models = [
190
+ "Qwen/QwQ-32B",
191
+ "Qwen/Qwen2.5-72B-Instruct",
192
+ "meta-llama/Llama-3.3-70B-Instruct",
193
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
194
+ "mistralai/Mistral-Small-24B-Instruct-2501",
195
+ ]
196
+
197
+ providers = []
198
+
199
+ for model in models:
200
+ provider = get_available_model_provider(model, verbose=True)
201
+ providers.append(provider)
202
+
203
+ print(f"Providers {len(providers)}: {providers}")
204
+
205
+
206
+ # print("\nTesting novita provider:")
207
+ # novita_available = test_provider("deepseek-ai/DeepSeek-V3-0324", "novita", verbose=True)
208
+ # print(f"novita available: {novita_available}")
backend/tests/test_provider_rate_limits.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """
3
+ Script to test rate limits of Hugging Face Inference API providers.
4
+ Spams requests to a model/provider and collects error messages.
5
+
6
+ Usage: python test_provider_rate_limits.py --model "model_name" --provider "provider_name" --requests 50
7
+ """
8
+
9
+ import argparse
10
+ import json
11
+ import time
12
+ import os
13
+ import requests
14
+ import sys
15
+ import logging
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from collections import Counter
18
+ from typing import Dict, List, Tuple
19
+ from dotenv import load_dotenv
20
+
21
+ # Add parent directory to path to import from tasks
22
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
23
+ from tasks.get_available_model_provider import prioritize_providers
24
+
25
+ # Configure logging
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s - %(levelname)s - %(message)s",
29
+ )
30
+ logger = logging.getLogger("rate_limit_test")
31
+
32
+ # Default model to test
33
+ DEFAULT_MODEL = "meta-llama/Llama-3.3-70B-Instruct"
34
+
35
+ def send_request(model: str, provider: str, token: str, request_id: int) -> Dict:
36
+ """
37
+ Send a single request to the model with the given provider.
38
+
39
+ Args:
40
+ model: Model name
41
+ provider: Provider name
42
+ token: HF token
43
+ request_id: ID for this request
44
+
45
+ Returns:
46
+ Dictionary with request info and result
47
+ """
48
+ headers = {
49
+ "Authorization": f"Bearer {token}",
50
+ "Content-Type": "application/json"
51
+ }
52
+
53
+ payload = {
54
+ "inputs": f"Request {request_id}: Hello, what do you thing about the future of AI? And divide me 10 by {request_id}",
55
+ "parameters": {
56
+ "max_new_tokens": 10000,
57
+ "provider": provider
58
+ }
59
+ }
60
+
61
+ api_url = f"https://api-inference.huggingface.co/models/{model}"
62
+
63
+ start_time = time.time()
64
+ try:
65
+ response = requests.post(api_url, headers=headers, json=payload, timeout=15)
66
+ end_time = time.time()
67
+
68
+ result = {
69
+ "request_id": request_id,
70
+ "status_code": response.status_code,
71
+ "time_taken": end_time - start_time,
72
+ "headers": dict(response.headers),
73
+ "success": response.status_code == 200,
74
+ }
75
+
76
+ if response.status_code != 200:
77
+ try:
78
+ error_data = response.json()
79
+ if isinstance(error_data, dict) and "error" in error_data:
80
+ result["error_message"] = error_data["error"]
81
+ else:
82
+ result["error_message"] = str(error_data)
83
+ except:
84
+ result["error_message"] = response.text
85
+
86
+ return result
87
+
88
+ except Exception as e:
89
+ end_time = time.time()
90
+ return {
91
+ "request_id": request_id,
92
+ "status_code": 0,
93
+ "time_taken": end_time - start_time,
94
+ "success": False,
95
+ "error_message": str(e)
96
+ }
97
+
98
+ def run_rate_limit_test(model: str, provider: str = None, num_requests: int = 50,
99
+ max_workers: int = 10, delay: float = 0.1) -> List[Dict]:
100
+ """
101
+ Run a rate limit test by sending multiple requests to the specified model/provider.
102
+
103
+ Args:
104
+ model: Model to test
105
+ provider: Provider to test (if None, will use first available)
106
+ num_requests: Number of requests to send
107
+ max_workers: Maximum number of concurrent workers
108
+ delay: Delay between batches of requests
109
+
110
+ Returns:
111
+ List of results for each request
112
+ """
113
+ # Load environment variables
114
+ load_dotenv()
115
+
116
+ # Get HF token
117
+ hf_token = os.environ.get("HF_TOKEN")
118
+ if not hf_token:
119
+ logger.error("HF_TOKEN not defined in environment")
120
+ return []
121
+
122
+ # If provider not specified, get first available
123
+ if not provider:
124
+ from tasks.get_available_model_provider import get_available_model_provider
125
+ provider = get_available_model_provider(model)
126
+ if not provider:
127
+ logger.error(f"No available provider found for {model}")
128
+ return []
129
+
130
+ logger.info(f"Testing rate limits for {model} with provider: {provider}")
131
+ logger.info(f"Sending {num_requests} requests with {max_workers} concurrent workers")
132
+
133
+ # Send requests in parallel
134
+ results = []
135
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
136
+ future_to_id = {
137
+ executor.submit(send_request, model, provider, hf_token, i): i
138
+ for i in range(num_requests)
139
+ }
140
+
141
+ completed = 0
142
+ for future in future_to_id:
143
+ result = future.result()
144
+ results.append(result)
145
+
146
+ completed += 1
147
+ if completed % 10 == 0:
148
+ logger.info(f"Completed {completed}/{num_requests} requests")
149
+
150
+ # Add a small delay periodically to avoid overwhelming the API
151
+ if completed % max_workers == 0:
152
+ time.sleep(delay)
153
+
154
+ return results
155
+
156
+ def analyze_results(results: List[Dict]) -> Dict:
157
+ """
158
+ Analyze the results of the rate limit test.
159
+
160
+ Args:
161
+ results: List of request results
162
+
163
+ Returns:
164
+ Dictionary with analysis
165
+ """
166
+ total_requests = len(results)
167
+ successful = sum(1 for r in results if r["success"])
168
+ failed = total_requests - successful
169
+
170
+ # Count different error messages
171
+ error_messages = Counter(r.get("error_message") for r in results if not r["success"])
172
+
173
+ # Calculate timing statistics
174
+ times = [r["time_taken"] for r in results]
175
+ avg_time = sum(times) / len(times) if times else 0
176
+
177
+ # Check for rate limiting headers
178
+ rate_limit_headers = set()
179
+ for r in results:
180
+ if "headers" in r:
181
+ for header in r["headers"]:
182
+ if "rate" in header.lower() or "limit" in header.lower():
183
+ rate_limit_headers.add(header)
184
+
185
+ return {
186
+ "total_requests": total_requests,
187
+ "successful_requests": successful,
188
+ "failed_requests": failed,
189
+ "success_rate": successful / total_requests if total_requests > 0 else 0,
190
+ "average_time": avg_time,
191
+ "error_messages": dict(error_messages),
192
+ "rate_limit_headers": list(rate_limit_headers)
193
+ }
194
+
195
+ def display_results(results: List[Dict], analysis: Dict) -> None:
196
+ """
197
+ Display the results of the rate limit test.
198
+
199
+ Args:
200
+ results: List of request results
201
+ analysis: Analysis of results
202
+ """
203
+ print("\n" + "="*80)
204
+ print(f"RATE LIMIT TEST RESULTS")
205
+ print("="*80)
206
+
207
+ print(f"\nTotal Requests: {analysis['total_requests']}")
208
+ print(f"Successful: {analysis['successful_requests']} ({analysis['success_rate']*100:.1f}%)")
209
+ print(f"Failed: {analysis['failed_requests']}")
210
+ print(f"Average Time: {analysis['average_time']:.3f} seconds")
211
+
212
+ if analysis["rate_limit_headers"]:
213
+ print("\nRate Limit Headers Found:")
214
+ for header in analysis["rate_limit_headers"]:
215
+ print(f" - {header}")
216
+
217
+ if analysis["error_messages"]:
218
+ print("\nError Messages:")
219
+ for msg, count in analysis["error_messages"].items():
220
+ print(f" - [{count} occurrences] {msg}")
221
+
222
+ # Print sample of headers from a failed request
223
+ failed_requests = [r for r in results if not r["success"]]
224
+ if failed_requests:
225
+ print("\nSample Headers from a Failed Request:")
226
+ for header, value in failed_requests[0].get("headers", {}).items():
227
+ print(f" {header}: {value}")
228
+
229
+ def main():
230
+ """
231
+ Main entry point for the script.
232
+ """
233
+ parser = argparse.ArgumentParser(description="Test rate limits of Hugging Face Inference API providers.")
234
+ parser.add_argument("--model", type=str, default=DEFAULT_MODEL, help="Name of the model to test")
235
+ parser.add_argument("--provider", type=str, help="Name of the provider to test (if not specified, will use first available)")
236
+ parser.add_argument("--requests", type=int, default=50, help="Number of requests to send")
237
+ parser.add_argument("--workers", type=int, default=10, help="Maximum number of concurrent workers")
238
+ parser.add_argument("--delay", type=float, default=0.1, help="Delay between batches of requests")
239
+ parser.add_argument("--output", type=str, help="Path to save results as JSON (optional)")
240
+
241
+ args = parser.parse_args()
242
+
243
+ # Run the test
244
+ results = run_rate_limit_test(
245
+ model=args.model,
246
+ provider=args.provider,
247
+ num_requests=args.requests,
248
+ max_workers=args.workers,
249
+ delay=args.delay
250
+ )
251
+
252
+ if not results:
253
+ logger.error("Test failed to run properly")
254
+ return
255
+
256
+ # Analyze the results
257
+ analysis = analyze_results(results)
258
+
259
+ # Display the results
260
+ display_results(results, analysis)
261
+
262
+ # Save results if requested
263
+ if args.output:
264
+ with open(args.output, "w") as f:
265
+ json.dump({
266
+ "results": results,
267
+ "analysis": analysis
268
+ }, f, indent=2)
269
+ logger.info(f"Results saved to {args.output}")
270
+
271
+ if __name__ == "__main__":
272
+ main()
frontend/src/components/BenchmarkGenerator.jsx CHANGED
@@ -12,21 +12,29 @@ const SIMULATION_DURATION = 120000; // 20 secondes
12
  // Define all benchmark steps in sequence
13
  const BENCHMARK_STEPS = [
14
  "configuration",
 
15
  "ingestion",
16
  "upload_ingest_to_hub",
17
  "summarization",
18
  "chunking",
19
  "single_shot_question_generation",
 
 
 
20
  ];
21
 
22
  // Step labels for display (more user-friendly names)
23
  const STEP_LABELS = {
24
  configuration: "Configuration",
 
25
  ingestion: "Ingestion",
26
  upload_ingest_to_hub: "Upload to Hub",
27
  summarization: "Summarization",
28
  chunking: "Chunking",
29
  single_shot_question_generation: "Question generation",
 
 
 
30
  };
31
 
32
  // Simulated log messages for pre-calculated documents
@@ -34,6 +42,8 @@ const SIMULATED_LOGS = [
34
  "[INFO] Initializing benchmark generation...",
35
  "[INFO] Generating base configuration file...",
36
  "[SUCCESS] Stage completed: configuration",
 
 
37
  "[INFO] Starting ingestion process...",
38
  "[SUCCESS] Stage completed: ingestion",
39
  "[INFO] Processing document content for upload...",
 
12
  // Define all benchmark steps in sequence
13
  const BENCHMARK_STEPS = [
14
  "configuration",
15
+ "provider_check",
16
  "ingestion",
17
  "upload_ingest_to_hub",
18
  "summarization",
19
  "chunking",
20
  "single_shot_question_generation",
21
+ "evaluation_provider_check",
22
+ "evaluation",
23
+ "evaluation_saving_results",
24
  ];
25
 
26
  // Step labels for display (more user-friendly names)
27
  const STEP_LABELS = {
28
  configuration: "Configuration",
29
+ provider_check: "Finding providers",
30
  ingestion: "Ingestion",
31
  upload_ingest_to_hub: "Upload to Hub",
32
  summarization: "Summarization",
33
  chunking: "Chunking",
34
  single_shot_question_generation: "Question generation",
35
+ evaluation_provider_check: "Checking evaluation providers",
36
+ evaluation: "Running evaluations",
37
+ evaluation_saving_results: "Saving evaluation results",
38
  };
39
 
40
  // Simulated log messages for pre-calculated documents
 
42
  "[INFO] Initializing benchmark generation...",
43
  "[INFO] Generating base configuration file...",
44
  "[SUCCESS] Stage completed: configuration",
45
+ "[INFO] Finding available providers for models...",
46
+ "[SUCCESS] Stage completed: provider_check",
47
  "[INFO] Starting ingestion process...",
48
  "[SUCCESS] Stage completed: ingestion",
49
  "[INFO] Processing document content for upload...",