Quazim0t0 commited on
Commit
ecb6742
·
verified ·
1 Parent(s): bc94cad

Update evaluation_queue.py

Browse files
Files changed (1) hide show
  1. evaluation_queue.py +39 -87
evaluation_queue.py CHANGED
@@ -9,7 +9,7 @@ import os
9
  import json
10
  import time
11
  import threading
12
- import queue
13
  from datetime import datetime, timedelta
14
  import gradio as gr
15
  from huggingface_hub import HfApi, hf_hub_download, snapshot_download
@@ -31,14 +31,13 @@ class EvaluationQueue:
31
  self.db_manager = db_manager
32
  self.auth_manager = auth_manager
33
  self.hf_api = HfApi()
34
- self.queue = queue.Queue()
35
  self.is_processing = False
36
  self.worker_thread = None
37
  self.model_tags = ["Merge", "Agent", "Reasoning", "Coding", "General", "Specialized", "Instruction", "Chat"]
38
  self.current_evaluation = None
39
  self.progress = 0
40
  self.progress_lock = threading.Lock()
41
- self.db_path = db_manager.db_path # Store the path to create new connections in worker thread
42
 
43
  def start_worker(self):
44
  """Start the worker thread for processing the evaluation queue."""
@@ -56,36 +55,17 @@ class EvaluationQueue:
56
 
57
  def _process_queue(self):
58
  """Process the evaluation queue in a separate thread."""
59
- # Create a new database connection for this thread
60
- thread_db = sqlite3.connect(self.db_path)
61
- thread_db.row_factory = sqlite3.Row
62
-
63
  while self.is_processing:
64
  try:
65
- # Get the next evaluation from the database using thread-local connection
66
- cursor = thread_db.cursor()
67
- cursor.execute("""
68
- SELECT e.id as evaluation_id, e.model_id, e.benchmark_id, m.hf_model_id, b.dataset_id
69
- FROM queue q
70
- JOIN evaluations e ON q.evaluation_id = e.id
71
- JOIN models m ON e.model_id = m.id
72
- JOIN benchmarks b ON e.benchmark_id = b.id
73
- WHERE e.status = 'pending'
74
- ORDER BY q.priority DESC, q.added_at ASC
75
- LIMIT 1
76
- """)
77
- row = cursor.fetchone()
78
 
79
- if row:
80
- next_eval = dict(row)
 
81
 
82
  # Update status to running
83
- cursor.execute("""
84
- UPDATE evaluations
85
- SET status = 'running', started_at = datetime('now')
86
- WHERE id = ?
87
- """, (next_eval['evaluation_id'],))
88
- thread_db.commit()
89
 
90
  # Set current evaluation and reset progress
91
  with self.progress_lock:
@@ -93,34 +73,33 @@ class EvaluationQueue:
93
  self.progress = 0
94
 
95
  try:
96
- # Run the evaluation
97
- results = self._run_evaluation(
98
- next_eval['hf_model_id'],
99
- next_eval['dataset_id']
100
- )
101
-
102
- # Calculate overall score
103
- score = self._calculate_overall_score(results)
104
 
105
- # Update status to completed with results
106
- cursor.execute("""
107
- UPDATE evaluations
108
- SET status = 'completed',
109
- completed_at = datetime('now'),
110
- results = ?,
111
- score = ?
112
- WHERE id = ?
113
- """, (json.dumps(results), score, next_eval['evaluation_id']))
114
- thread_db.commit()
 
 
 
 
 
 
 
 
 
115
  except Exception as e:
116
  print(f"Evaluation error: {e}")
117
  # Update status to failed
118
- cursor.execute("""
119
- UPDATE evaluations
120
- SET status = 'failed', completed_at = datetime('now')
121
- WHERE id = ?
122
- """, (next_eval['evaluation_id'],))
123
- thread_db.commit()
124
 
125
  # Clear current evaluation
126
  with self.progress_lock:
@@ -132,9 +111,6 @@ class EvaluationQueue:
132
  except Exception as e:
133
  print(f"Queue processing error: {e}")
134
  time.sleep(5)
135
-
136
- # Close the thread-local database connection
137
- thread_db.close()
138
 
139
  def _run_evaluation(self, model_id, dataset_id):
140
  """Run an evaluation for a model on a benchmark.
@@ -211,7 +187,8 @@ class EvaluationQueue:
211
  # Clean up to free memory
212
  del model
213
  del tokenizer
214
- torch.cuda.empty_cache()
 
215
 
216
  # Update progress
217
  with self.progress_lock:
@@ -418,7 +395,8 @@ class EvaluationQueue:
418
  # Update progress based on completion percentage
419
  with self.progress_lock:
420
  self.progress = 40 + int((i / len(dataset)) * 50)
421
-
 
422
  text = example.get("text", example.get("sentence", ""))
423
  label = str(example.get("label", example.get("class", "")))
424
 
@@ -669,7 +647,7 @@ class EvaluationQueue:
669
  priority: Queue priority (higher = higher priority)
670
 
671
  Returns:
672
- int: Evaluation ID if successful, None otherwise
673
  """
674
  # Check if user can submit today
675
  if not self.auth_manager.can_submit_benchmark(user_id):
@@ -806,28 +784,13 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
806
  current_eval_info = gr.Markdown("No evaluation currently running")
807
  # Use a simple text display for progress instead of Progress component
808
  progress_display = gr.Markdown("Progress: 0%")
809
-
810
- # Function to update progress display
811
- def update_progress_display():
812
- current_eval, progress = evaluation_queue.get_current_progress()
813
-
814
- if current_eval:
815
- model_info = db_manager.get_model(current_eval['model_id'])
816
- benchmark_info = db_manager.get_benchmark(current_eval['benchmark_id'])
817
-
818
- if model_info and benchmark_info:
819
- eval_info = f"**Currently Evaluating:** {model_info['name']} on {benchmark_info['name']}"
820
- progress_text = f"Progress: {progress}%"
821
- return eval_info, progress_text
822
-
823
- return "No evaluation currently running", "Progress: 0%"
824
 
825
  # Event handlers
826
  def refresh_benchmarks_handler():
827
  benchmarks = db_manager.get_benchmarks()
828
 
829
  # Format for dropdown
830
- choices = [(b["id"], b["name"]) for b in benchmarks]
831
 
832
  return gr.update(choices=choices)
833
 
@@ -873,7 +836,7 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
873
  # Get queue statistics
874
  stats = evaluation_queue.get_queue_status()
875
 
876
- # Get recent evaluations
877
  evals = db_manager.get_evaluation_results(limit=20)
878
 
879
  # Format for dataframe
@@ -932,16 +895,5 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
932
  inputs=[],
933
  outputs=[benchmark_dropdown]
934
  )
935
-
936
- submission_ui.load(
937
- fn=refresh_queue_handler,
938
- inputs=[],
939
- outputs=[queue_stats, queue_status, current_eval_info, progress_display]
940
- )
941
-
942
- # Manual refresh button with instructions
943
- gr.Markdown("""
944
- **Note:** Click the 'Refresh Queue' button periodically to update the progress display.
945
- """)
946
 
947
- return submission_ui
 
9
  import json
10
  import time
11
  import threading
12
+ import queue as queue_module
13
  from datetime import datetime, timedelta
14
  import gradio as gr
15
  from huggingface_hub import HfApi, hf_hub_download, snapshot_download
 
31
  self.db_manager = db_manager
32
  self.auth_manager = auth_manager
33
  self.hf_api = HfApi()
34
+ self.queue = queue_module.Queue()
35
  self.is_processing = False
36
  self.worker_thread = None
37
  self.model_tags = ["Merge", "Agent", "Reasoning", "Coding", "General", "Specialized", "Instruction", "Chat"]
38
  self.current_evaluation = None
39
  self.progress = 0
40
  self.progress_lock = threading.Lock()
 
41
 
42
  def start_worker(self):
43
  """Start the worker thread for processing the evaluation queue."""
 
55
 
56
  def _process_queue(self):
57
  """Process the evaluation queue in a separate thread."""
 
 
 
 
58
  while self.is_processing:
59
  try:
60
+ # Get the next evaluation from the database
61
+ pending_evals = self.db_manager.get_evaluation_results(status="pending")
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ if pending_evals:
64
+ # Sort by priority and added_at
65
+ next_eval = pending_evals[0]
66
 
67
  # Update status to running
68
+ self.db_manager.update_evaluation_status(next_eval['id'], 'running')
 
 
 
 
 
69
 
70
  # Set current evaluation and reset progress
71
  with self.progress_lock:
 
73
  self.progress = 0
74
 
75
  try:
76
+ # Get model and benchmark details
77
+ model_info = self.db_manager.get_model(next_eval['model_id'])
78
+ benchmark_info = self.db_manager.get_benchmark(next_eval['benchmark_id'])
 
 
 
 
 
79
 
80
+ if model_info and benchmark_info:
81
+ # Run the evaluation
82
+ results = self._run_evaluation(
83
+ model_info['hf_model_id'],
84
+ benchmark_info['dataset_id']
85
+ )
86
+
87
+ # Calculate overall score
88
+ score = self._calculate_overall_score(results)
89
+
90
+ # Update status to completed with results
91
+ self.db_manager.update_evaluation_status(
92
+ next_eval['id'],
93
+ 'completed',
94
+ results=results,
95
+ score=score
96
+ )
97
+ else:
98
+ raise Exception("Model or benchmark not found")
99
  except Exception as e:
100
  print(f"Evaluation error: {e}")
101
  # Update status to failed
102
+ self.db_manager.update_evaluation_status(next_eval['id'], 'failed')
 
 
 
 
 
103
 
104
  # Clear current evaluation
105
  with self.progress_lock:
 
111
  except Exception as e:
112
  print(f"Queue processing error: {e}")
113
  time.sleep(5)
 
 
 
114
 
115
  def _run_evaluation(self, model_id, dataset_id):
116
  """Run an evaluation for a model on a benchmark.
 
187
  # Clean up to free memory
188
  del model
189
  del tokenizer
190
+ if torch.cuda.is_available():
191
+ torch.cuda.empty_cache()
192
 
193
  # Update progress
194
  with self.progress_lock:
 
395
  # Update progress based on completion percentage
396
  with self.progress_lock:
397
  self.progress = 40 + int((i / len(dataset)) * 50)
398
+
399
+
400
  text = example.get("text", example.get("sentence", ""))
401
  label = str(example.get("label", example.get("class", "")))
402
 
 
647
  priority: Queue priority (higher = higher priority)
648
 
649
  Returns:
650
+ tuple: (evaluation_id, message)
651
  """
652
  # Check if user can submit today
653
  if not self.auth_manager.can_submit_benchmark(user_id):
 
784
  current_eval_info = gr.Markdown("No evaluation currently running")
785
  # Use a simple text display for progress instead of Progress component
786
  progress_display = gr.Markdown("Progress: 0%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787
 
788
  # Event handlers
789
  def refresh_benchmarks_handler():
790
  benchmarks = db_manager.get_benchmarks()
791
 
792
  # Format for dropdown
793
+ choices = [(str(b["id"]), b["name"]) for b in benchmarks]
794
 
795
  return gr.update(choices=choices)
796
 
 
836
  # Get queue statistics
837
  stats = evaluation_queue.get_queue_status()
838
 
839
+ # Get recent evaluations (all statuses, limited to 20)
840
  evals = db_manager.get_evaluation_results(limit=20)
841
 
842
  # Format for dataframe
 
895
  inputs=[],
896
  outputs=[benchmark_dropdown]
897
  )
 
 
 
 
 
 
 
 
 
 
 
898
 
899
+ return submission_ui