qLeaderboard-aBase4Community

Running

App Files Files Community

Quazim0t0 commited on Mar 21

Commit

ecb6742

verified ·

1 Parent(s): bc94cad

Update evaluation_queue.py

Browse files

Files changed (1) hide show

evaluation_queue.py +39 -87

evaluation_queue.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 import json
 import time
 import threading
-import queue
 from datetime import datetime, timedelta
 import gradio as gr
 from huggingface_hub import HfApi, hf_hub_download, snapshot_download
@@ -31,14 +31,13 @@ class EvaluationQueue:
         self.db_manager = db_manager
         self.auth_manager = auth_manager
         self.hf_api = HfApi()
-        self.queue = queue.Queue()
         self.is_processing = False
         self.worker_thread = None
         self.model_tags = ["Merge", "Agent", "Reasoning", "Coding", "General", "Specialized", "Instruction", "Chat"]
         self.current_evaluation = None
         self.progress = 0
         self.progress_lock = threading.Lock()
-        self.db_path = db_manager.db_path  # Store the path to create new connections in worker thread
     def start_worker(self):
         """Start the worker thread for processing the evaluation queue."""
@@ -56,36 +55,17 @@ class EvaluationQueue:
     def _process_queue(self):
         """Process the evaluation queue in a separate thread."""
-        # Create a new database connection for this thread
-        thread_db = sqlite3.connect(self.db_path)
-        thread_db.row_factory = sqlite3.Row
         while self.is_processing:
             try:
-                # Get the next evaluation from the database using thread-local connection
-                cursor = thread_db.cursor()
-                cursor.execute("""
-                    SELECT e.id as evaluation_id, e.model_id, e.benchmark_id, m.hf_model_id, b.dataset_id
-                    FROM queue q
-                    JOIN evaluations e ON q.evaluation_id = e.id
-                    JOIN models m ON e.model_id = m.id
-                    JOIN benchmarks b ON e.benchmark_id = b.id
-                    WHERE e.status = 'pending'
-                    ORDER BY q.priority DESC, q.added_at ASC
-                    LIMIT 1
-                """)
-                row = cursor.fetchone()
-                if row:
-                    next_eval = dict(row)
                     # Update status to running
-                    cursor.execute("""
-                        UPDATE evaluations
-                        SET status = 'running', started_at = datetime('now')
-                        WHERE id = ?
-                    """, (next_eval['evaluation_id'],))
-                    thread_db.commit()
                     # Set current evaluation and reset progress
                     with self.progress_lock:
@@ -93,34 +73,33 @@ class EvaluationQueue:
                         self.progress = 0
                     try:
-                        # Run the evaluation
-                        results = self._run_evaluation(
-                            next_eval['hf_model_id'],
-                            next_eval['dataset_id']
-                        )
-                        # Calculate overall score
-                        score = self._calculate_overall_score(results)
-                        # Update status to completed with results
-                        cursor.execute("""
-                            UPDATE evaluations
-                            SET status = 'completed',
-                                completed_at = datetime('now'),
-                                results = ?,
-                                score = ?
-                            WHERE id = ?
-                        """, (json.dumps(results), score, next_eval['evaluation_id']))
-                        thread_db.commit()
                     except Exception as e:
                         print(f"Evaluation error: {e}")
                         # Update status to failed
-                        cursor.execute("""
-                            UPDATE evaluations
-                            SET status = 'failed', completed_at = datetime('now')
-                            WHERE id = ?
-                        """, (next_eval['evaluation_id'],))
-                        thread_db.commit()
                     # Clear current evaluation
                     with self.progress_lock:
@@ -132,9 +111,6 @@ class EvaluationQueue:
             except Exception as e:
                 print(f"Queue processing error: {e}")
                 time.sleep(5)
-        # Close the thread-local database connection
-        thread_db.close()
     def _run_evaluation(self, model_id, dataset_id):
         """Run an evaluation for a model on a benchmark.
@@ -211,7 +187,8 @@ class EvaluationQueue:
         # Clean up to free memory
         del model
         del tokenizer
-        torch.cuda.empty_cache()
         # Update progress
         with self.progress_lock:
@@ -418,7 +395,8 @@ class EvaluationQueue:
             # Update progress based on completion percentage
             with self.progress_lock:
                 self.progress = 40 + int((i / len(dataset)) * 50)
             text = example.get("text", example.get("sentence", ""))
             label = str(example.get("label", example.get("class", "")))
@@ -669,7 +647,7 @@ class EvaluationQueue:
             priority: Queue priority (higher = higher priority)
         Returns:
-            int: Evaluation ID if successful, None otherwise
         """
         # Check if user can submit today
         if not self.auth_manager.can_submit_benchmark(user_id):
@@ -806,28 +784,13 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
                     current_eval_info = gr.Markdown("No evaluation currently running")
                     # Use a simple text display for progress instead of Progress component
                     progress_display = gr.Markdown("Progress: 0%")
-            # Function to update progress display
-            def update_progress_display():
-                current_eval, progress = evaluation_queue.get_current_progress()
-                if current_eval:
-                    model_info = db_manager.get_model(current_eval['model_id'])
-                    benchmark_info = db_manager.get_benchmark(current_eval['benchmark_id'])
-                    if model_info and benchmark_info:
-                        eval_info = f"**Currently Evaluating:** {model_info['name']} on {benchmark_info['name']}"
-                        progress_text = f"Progress: {progress}%"
-                        return eval_info, progress_text
-                return "No evaluation currently running", "Progress: 0%"
         # Event handlers
         def refresh_benchmarks_handler():
             benchmarks = db_manager.get_benchmarks()
             # Format for dropdown
-            choices = [(b["id"], b["name"]) for b in benchmarks]
             return gr.update(choices=choices)
@@ -873,7 +836,7 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
             # Get queue statistics
             stats = evaluation_queue.get_queue_status()
-            # Get recent evaluations
             evals = db_manager.get_evaluation_results(limit=20)
             # Format for dataframe
@@ -932,16 +895,5 @@ def create_model_submission_ui(evaluation_queue, auth_manager, db_manager):
             inputs=[],
             outputs=[benchmark_dropdown]
         )
-        submission_ui.load(
-            fn=refresh_queue_handler,
-            inputs=[],
-            outputs=[queue_stats, queue_status, current_eval_info, progress_display]
-        )
-        # Manual refresh button with instructions
-        gr.Markdown("""
-        **Note:** Click the 'Refresh Queue' button periodically to update the progress display.
-        """)
-    return submission_ui

 import json
 import time
 import threading
+import queue as queue_module
 from datetime import datetime, timedelta
 import gradio as gr
 from huggingface_hub import HfApi, hf_hub_download, snapshot_download
         self.db_manager = db_manager
         self.auth_manager = auth_manager
         self.hf_api = HfApi()
+        self.queue = queue_module.Queue()
         self.is_processing = False
         self.worker_thread = None
         self.model_tags = ["Merge", "Agent", "Reasoning", "Coding", "General", "Specialized", "Instruction", "Chat"]
         self.current_evaluation = None
         self.progress = 0
         self.progress_lock = threading.Lock()
     def start_worker(self):
         """Start the worker thread for processing the evaluation queue."""
     def _process_queue(self):
         """Process the evaluation queue in a separate thread."""
         while self.is_processing:
             try:
+                # Get the next evaluation from the database
+                pending_evals = self.db_manager.get_evaluation_results(status="pending")
+                if pending_evals:
+                    # Sort by priority and added_at
+                    next_eval = pending_evals[0]
                     # Update status to running
+                    self.db_manager.update_evaluation_status(next_eval['id'], 'running')
                     # Set current evaluation and reset progress
                     with self.progress_lock:
                         self.progress = 0
                     try:
+                        # Get model and benchmark details
+                        model_info = self.db_manager.get_model(next_eval['model_id'])
+                        benchmark_info = self.db_manager.get_benchmark(next_eval['benchmark_id'])
+                        if model_info and benchmark_info:
+                            # Run the evaluation
+                            results = self._run_evaluation(
+                                model_info['hf_model_id'],
+                                benchmark_info['dataset_id']
+                            )
+                            # Calculate overall score
+                            score = self._calculate_overall_score(results)
+                            # Update status to completed with results
+                            self.db_manager.update_evaluation_status(
+                                next_eval['id'],
+                                'completed',
+                                results=results,
+                                score=score
+                            )
+                        else:
+                            raise Exception("Model or benchmark not found")
                     except Exception as e:
                         print(f"Evaluation error: {e}")
                         # Update status to failed
+                        self.db_manager.update_evaluation_status(next_eval['id'], 'failed')
                     # Clear current evaluation
                     with self.progress_lock:
             except Exception as e:
                 print(f"Queue processing error: {e}")
                 time.sleep(5)
     def _run_evaluation(self, model_id, dataset_id):
         """Run an evaluation for a model on a benchmark.
         # Clean up to free memory
         del model
         del tokenizer
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         # Update progress
         with self.progress_lock:
             # Update progress based on completion percentage
             with self.progress_lock:
                 self.progress = 40 + int((i / len(dataset)) * 50)
             text = example.get("text", example.get("sentence", ""))
             label = str(example.get("label", example.get("class", "")))
             priority: Queue priority (higher = higher priority)
         Returns:
+            tuple: (evaluation_id, message)
         """
         # Check if user can submit today
         if not self.auth_manager.can_submit_benchmark(user_id):
                     current_eval_info = gr.Markdown("No evaluation currently running")
                     # Use a simple text display for progress instead of Progress component
                     progress_display = gr.Markdown("Progress: 0%")
         # Event handlers
         def refresh_benchmarks_handler():
             benchmarks = db_manager.get_benchmarks()
             # Format for dropdown
+            choices = [(str(b["id"]), b["name"]) for b in benchmarks]
             return gr.update(choices=choices)
             # Get queue statistics
             stats = evaluation_queue.get_queue_status()
+            # Get recent evaluations (all statuses, limited to 20)
             evals = db_manager.get_evaluation_results(limit=20)
             # Format for dataframe
             inputs=[],
             outputs=[benchmark_dropdown]
         )
+    return submission_ui