Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add config file for models | fix github link in intro
Browse files
backend/config/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Configuration module for the application
|
3 |
+
"""
|
backend/config/models_config.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Central configuration for models and providers
|
3 |
+
|
4 |
+
This file centralizes all configurations related to models and providers used in the application.
|
5 |
+
"""
|
6 |
+
|
7 |
+
# Definition of preferred providers, used in get_available_model_provider.py
|
8 |
+
PREFERRED_PROVIDERS = ["fireworks-ai", "sambanova", "novita"]
|
9 |
+
|
10 |
+
# Default models to evaluate for evaluation
|
11 |
+
DEFAULT_EVALUATION_MODELS = [
|
12 |
+
"Qwen/QwQ-32B",
|
13 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
14 |
+
"Qwen/Qwen2.5-32B-Instruct",
|
15 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
16 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
17 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
18 |
+
"mistralai/Mistral-Small-24B-Instruct-2501",
|
19 |
+
]
|
20 |
+
|
21 |
+
# Required model for create_bench_config_file.py (only one default model)
|
22 |
+
DEFAULT_MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
23 |
+
|
24 |
+
# Models by roles for benchmark configuration
|
25 |
+
# All roles use the default model except chunking
|
26 |
+
MODEL_ROLES = {
|
27 |
+
"ingestion": [DEFAULT_MODEL],
|
28 |
+
"summarization": [DEFAULT_MODEL],
|
29 |
+
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
30 |
+
"single_shot_question_generation": [DEFAULT_MODEL],
|
31 |
+
"multi_hop_question_generation": [DEFAULT_MODEL],
|
32 |
+
}
|
33 |
+
|
34 |
+
# Default evaluation timeout (in seconds)
|
35 |
+
DEFAULT_EVALUATION_TIMEOUT = 60.0
|
36 |
+
|
37 |
+
# Default benchmark timeout (in seconds)
|
38 |
+
DEFAULT_BENCHMARK_TIMEOUT = 300.0
|
backend/routes/benchmark.py
CHANGED
@@ -177,7 +177,9 @@ class UnifiedBenchmarkTask:
|
|
177 |
try:
|
178 |
# Step 1: Configuration
|
179 |
self._add_log("[INFO] Starting configuration process")
|
180 |
-
|
|
|
|
|
181 |
|
182 |
# Execute the configuration task
|
183 |
try:
|
|
|
177 |
try:
|
178 |
# Step 1: Configuration
|
179 |
self._add_log("[INFO] Starting configuration process")
|
180 |
+
# Import and use DEFAULT_BENCHMARK_TIMEOUT
|
181 |
+
from config.models_config import DEFAULT_BENCHMARK_TIMEOUT
|
182 |
+
self.config_task = CreateBenchConfigTask(session_uid=self.session_uid, timeout=DEFAULT_BENCHMARK_TIMEOUT)
|
183 |
|
184 |
# Execute the configuration task
|
185 |
try:
|
backend/tasks/create_bench_config_file.py
CHANGED
@@ -14,6 +14,11 @@ from loguru import logger
|
|
14 |
from huggingface_hub import HfApi
|
15 |
|
16 |
from tasks.get_available_model_provider import get_available_model_provider
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
class CreateBenchConfigTask:
|
@@ -21,18 +26,20 @@ class CreateBenchConfigTask:
|
|
21 |
Task to create and save a configuration file for YourbenchSimpleDemo
|
22 |
"""
|
23 |
|
24 |
-
def __init__(self, session_uid: Optional[str] = None):
|
25 |
"""
|
26 |
Initialize the task with a session ID
|
27 |
|
28 |
Args:
|
29 |
session_uid: Optional session ID, will be generated if None
|
|
|
30 |
"""
|
31 |
self.session_uid = session_uid or str(uuid.uuid4())
|
32 |
self.logs: List[str] = []
|
33 |
self.is_completed = False
|
34 |
self.is_running_flag = threading.Event()
|
35 |
self.thread = None
|
|
|
36 |
self._add_log("[INFO] Initializing configuration creation task")
|
37 |
|
38 |
def _add_log(self, message: str) -> None:
|
@@ -116,40 +123,22 @@ class CreateBenchConfigTask:
|
|
116 |
if not hf_token:
|
117 |
raise RuntimeError("HF_TOKEN environment variable is not defined")
|
118 |
|
119 |
-
# Get
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
required_models = [
|
124 |
-
# "Qwen/Qwen2.5-72B-Instruct"
|
125 |
-
# "meta-llama/Llama-3.1-8B-Instruct"
|
126 |
-
# "Qwen/Qwen2.5-32B-Instruct",
|
127 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
128 |
-
]
|
129 |
-
|
130 |
-
# Track found models
|
131 |
-
found_models = set()
|
132 |
-
|
133 |
-
for model_name in required_models:
|
134 |
-
provider = self.get_model_provider(model_name)
|
135 |
-
if provider:
|
136 |
-
model_list.append({
|
137 |
-
"model_name": model_name,
|
138 |
-
"provider": provider,
|
139 |
-
"api_key": "$HF_TOKEN",
|
140 |
-
"max_concurrent_requests": 32,
|
141 |
-
})
|
142 |
-
found_models.add(model_name)
|
143 |
-
|
144 |
-
# # Check if both required models are available
|
145 |
-
if len(found_models) < len(required_models):
|
146 |
-
missing_models = set(required_models) - found_models
|
147 |
-
missing_models_str = ", ".join(missing_models)
|
148 |
-
error_msg = f"Required models not available: {missing_models_str}. Cannot proceed with benchmark."
|
149 |
self._add_log(f"[ERROR] {error_msg}")
|
150 |
raise RuntimeError(error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
-
#
|
153 |
self._add_log("[INFO] Finalizing provider check...")
|
154 |
time.sleep(2)
|
155 |
|
@@ -158,36 +147,35 @@ class CreateBenchConfigTask:
|
|
158 |
|
159 |
return {
|
160 |
"hf_configuration": {
|
161 |
-
"token": "$HF_TOKEN",
|
162 |
"hf_organization": "$HF_ORGANIZATION",
|
163 |
"private": True,
|
164 |
"hf_dataset_name": hf_dataset_name,
|
165 |
"concat_if_exist": False,
|
|
|
166 |
},
|
167 |
"model_list": model_list,
|
168 |
|
169 |
-
"model_roles":
|
170 |
-
"ingestion": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
171 |
-
"summarization": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
172 |
-
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
173 |
-
"single_shot_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
174 |
-
"multi_hop_question_generation": ["deepseek-ai/DeepSeek-R1-Distill-Llama-70B"],
|
175 |
-
},
|
176 |
"pipeline": {
|
177 |
"ingestion": {
|
178 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
|
179 |
"output_dir": f"uploaded_files/{self.session_uid}/ingested",
|
180 |
"run": True,
|
|
|
181 |
},
|
182 |
"upload_ingest_to_hub": {
|
183 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
|
184 |
-
"run": True,
|
|
|
185 |
},
|
186 |
"summarization": {
|
187 |
"run": True,
|
|
|
188 |
},
|
189 |
"chunking": {
|
190 |
"run": True,
|
|
|
191 |
"chunking_configuration": {
|
192 |
"l_min_tokens": 64,
|
193 |
"l_max_tokens": 128,
|
@@ -199,6 +187,7 @@ class CreateBenchConfigTask:
|
|
199 |
},
|
200 |
"single_shot_question_generation": {
|
201 |
"run": True,
|
|
|
202 |
"additional_instructions": "Generate rich and creative questions to test a curious adult",
|
203 |
"chunk_sampling": {
|
204 |
"mode": "count",
|
@@ -208,9 +197,11 @@ class CreateBenchConfigTask:
|
|
208 |
},
|
209 |
"multi_hop_question_generation": {
|
210 |
"run": False,
|
|
|
211 |
},
|
212 |
"lighteval": {
|
213 |
"run": False,
|
|
|
214 |
},
|
215 |
},
|
216 |
}
|
@@ -310,17 +301,22 @@ class CreateBenchConfigTask:
|
|
310 |
self.mark_task_completed()
|
311 |
raise RuntimeError(error_msg)
|
312 |
|
313 |
-
def run(self, file_path: str, token: Optional[str] = None) -> str:
|
314 |
"""
|
315 |
Run the task to create and save the configuration file asynchronously
|
316 |
|
317 |
Args:
|
318 |
file_path: Path to the uploaded file
|
319 |
token: Hugging Face token (not used, using HF_TOKEN from environment)
|
|
|
320 |
|
321 |
Returns:
|
322 |
Path to the configuration file
|
323 |
"""
|
|
|
|
|
|
|
|
|
324 |
# Mark the task as running
|
325 |
self.is_running_flag.set()
|
326 |
|
|
|
14 |
from huggingface_hub import HfApi
|
15 |
|
16 |
from tasks.get_available_model_provider import get_available_model_provider
|
17 |
+
from config.models_config import (
|
18 |
+
DEFAULT_MODEL,
|
19 |
+
MODEL_ROLES,
|
20 |
+
DEFAULT_BENCHMARK_TIMEOUT,
|
21 |
+
)
|
22 |
|
23 |
|
24 |
class CreateBenchConfigTask:
|
|
|
26 |
Task to create and save a configuration file for YourbenchSimpleDemo
|
27 |
"""
|
28 |
|
29 |
+
def __init__(self, session_uid: Optional[str] = None, timeout: float = None):
|
30 |
"""
|
31 |
Initialize the task with a session ID
|
32 |
|
33 |
Args:
|
34 |
session_uid: Optional session ID, will be generated if None
|
35 |
+
timeout: Timeout in seconds for benchmark operations (if None, uses default)
|
36 |
"""
|
37 |
self.session_uid = session_uid or str(uuid.uuid4())
|
38 |
self.logs: List[str] = []
|
39 |
self.is_completed = False
|
40 |
self.is_running_flag = threading.Event()
|
41 |
self.thread = None
|
42 |
+
self.timeout = timeout if timeout is not None else DEFAULT_BENCHMARK_TIMEOUT
|
43 |
self._add_log("[INFO] Initializing configuration creation task")
|
44 |
|
45 |
def _add_log(self, message: str) -> None:
|
|
|
123 |
if not hf_token:
|
124 |
raise RuntimeError("HF_TOKEN environment variable is not defined")
|
125 |
|
126 |
+
# Get provider for the default model
|
127 |
+
provider = self.get_model_provider(DEFAULT_MODEL)
|
128 |
+
if not provider:
|
129 |
+
error_msg = f"Required model not available: {DEFAULT_MODEL}. Cannot proceed with benchmark."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
self._add_log(f"[ERROR] {error_msg}")
|
131 |
raise RuntimeError(error_msg)
|
132 |
+
|
133 |
+
# Create model configuration
|
134 |
+
model_list = [{
|
135 |
+
"model_name": DEFAULT_MODEL,
|
136 |
+
"provider": provider,
|
137 |
+
"api_key": "$HF_TOKEN",
|
138 |
+
"max_concurrent_requests": 32,
|
139 |
+
}]
|
140 |
|
141 |
+
# Add minimum delay of 2 seconds for provider_check stage
|
142 |
self._add_log("[INFO] Finalizing provider check...")
|
143 |
time.sleep(2)
|
144 |
|
|
|
147 |
|
148 |
return {
|
149 |
"hf_configuration": {
|
150 |
+
"token": "$HF_TOKEN",
|
151 |
"hf_organization": "$HF_ORGANIZATION",
|
152 |
"private": True,
|
153 |
"hf_dataset_name": hf_dataset_name,
|
154 |
"concat_if_exist": False,
|
155 |
+
"timeout": self.timeout, # Add timeout to configuration
|
156 |
},
|
157 |
"model_list": model_list,
|
158 |
|
159 |
+
"model_roles": MODEL_ROLES,
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
"pipeline": {
|
161 |
"ingestion": {
|
162 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/uploaded_files/",
|
163 |
"output_dir": f"uploaded_files/{self.session_uid}/ingested",
|
164 |
"run": True,
|
165 |
+
"timeout": self.timeout, # Add timeout to ingestion
|
166 |
},
|
167 |
"upload_ingest_to_hub": {
|
168 |
"source_documents_dir": f"uploaded_files/{self.session_uid}/ingested",
|
169 |
+
"run": True,
|
170 |
+
"timeout": self.timeout, # Add timeout to upload
|
171 |
},
|
172 |
"summarization": {
|
173 |
"run": True,
|
174 |
+
"timeout": self.timeout, # Add timeout to summarization
|
175 |
},
|
176 |
"chunking": {
|
177 |
"run": True,
|
178 |
+
"timeout": self.timeout, # Add timeout to chunking
|
179 |
"chunking_configuration": {
|
180 |
"l_min_tokens": 64,
|
181 |
"l_max_tokens": 128,
|
|
|
187 |
},
|
188 |
"single_shot_question_generation": {
|
189 |
"run": True,
|
190 |
+
"timeout": self.timeout, # Add timeout to question generation
|
191 |
"additional_instructions": "Generate rich and creative questions to test a curious adult",
|
192 |
"chunk_sampling": {
|
193 |
"mode": "count",
|
|
|
197 |
},
|
198 |
"multi_hop_question_generation": {
|
199 |
"run": False,
|
200 |
+
"timeout": self.timeout, # Add timeout to multi-hop question generation
|
201 |
},
|
202 |
"lighteval": {
|
203 |
"run": False,
|
204 |
+
"timeout": self.timeout, # Add timeout to lighteval
|
205 |
},
|
206 |
},
|
207 |
}
|
|
|
301 |
self.mark_task_completed()
|
302 |
raise RuntimeError(error_msg)
|
303 |
|
304 |
+
def run(self, file_path: str, token: Optional[str] = None, timeout: Optional[float] = None) -> str:
|
305 |
"""
|
306 |
Run the task to create and save the configuration file asynchronously
|
307 |
|
308 |
Args:
|
309 |
file_path: Path to the uploaded file
|
310 |
token: Hugging Face token (not used, using HF_TOKEN from environment)
|
311 |
+
timeout: Timeout in seconds for benchmark operations (if None, uses default)
|
312 |
|
313 |
Returns:
|
314 |
Path to the configuration file
|
315 |
"""
|
316 |
+
# Update timeout if provided
|
317 |
+
if timeout is not None:
|
318 |
+
self.timeout = timeout
|
319 |
+
|
320 |
# Mark the task as running
|
321 |
self.is_running_flag.set()
|
322 |
|
backend/tasks/evaluation_task.py
CHANGED
@@ -16,19 +16,7 @@ from tasks.get_available_model_provider import get_available_model_provider
|
|
16 |
from huggingface_hub import HfApi
|
17 |
import asyncio
|
18 |
from datasets import load_dataset
|
19 |
-
|
20 |
-
DEFAULT_EVALUATION_TIMEOUT = 60.0 # 1 minute by default
|
21 |
-
|
22 |
-
# Models to evaluate - only accessible models
|
23 |
-
DEFAULT_EVALUATION_MODELS = [
|
24 |
-
"Qwen/QwQ-32B",
|
25 |
-
"Qwen/Qwen2.5-72B-Instruct",
|
26 |
-
"Qwen/Qwen2.5-32B-Instruct",
|
27 |
-
"meta-llama/Llama-3.1-8B-Instruct",
|
28 |
-
"meta-llama/Llama-3.3-70B-Instruct",
|
29 |
-
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
|
30 |
-
"mistralai/Mistral-Small-24B-Instruct-2501",
|
31 |
-
]
|
32 |
|
33 |
class EvaluationTask:
|
34 |
"""
|
|
|
16 |
from huggingface_hub import HfApi
|
17 |
import asyncio
|
18 |
from datasets import load_dataset
|
19 |
+
from config.models_config import DEFAULT_EVALUATION_MODELS, DEFAULT_EVALUATION_TIMEOUT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
class EvaluationTask:
|
22 |
"""
|
backend/tasks/get_available_model_provider.py
CHANGED
@@ -3,13 +3,11 @@ import logging
|
|
3 |
import json
|
4 |
from huggingface_hub import model_info, InferenceClient
|
5 |
from dotenv import load_dotenv
|
|
|
6 |
|
7 |
# Load environment variables once at the module level
|
8 |
load_dotenv()
|
9 |
|
10 |
-
# Define preferred providers
|
11 |
-
PREFERRED_PROVIDERS = ["fireworks-ai","sambanova", "novita"]
|
12 |
-
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
15 |
logger = logging.getLogger(__name__)
|
|
|
3 |
import json
|
4 |
from huggingface_hub import model_info, InferenceClient
|
5 |
from dotenv import load_dotenv
|
6 |
+
from config.models_config import PREFERRED_PROVIDERS
|
7 |
|
8 |
# Load environment variables once at the module level
|
9 |
load_dotenv()
|
10 |
|
|
|
|
|
|
|
11 |
# Configure logging
|
12 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
13 |
logger = logging.getLogger(__name__)
|