frugal-ai-submission-template

Sleeping

App Files Files Community

Tonic commited on Feb 10

Commit

08f1c39

unverified ·

1 Parent(s): 7eb6153

switch model loading technique

Browse files

Files changed (1) hide show

tasks/text.py +9 -25

tasks/text.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from fastapi import APIRouter
 from datetime import datetime
 import time
@@ -134,21 +135,8 @@ async def evaluate_text(request: TextEvaluationRequest):
     }
     try:
-        # Load and prepare the dataset with retry mechanism
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                dataset = load_dataset(
-                    "QuotaClimat/frugalaichallenge-text-train",
-                    token=HF_TOKEN,
-                    trust_remote_code=True
-                )
-                break
-            except Exception as e:
-                if attempt == max_retries - 1:
-                    raise Exception(f"Failed to load dataset after {max_retries} attempts: {str(e)}")
-                print(f"Dataset loading attempt {attempt + 1} failed, retrying... Error: {str(e)}")
-                time.sleep(2)
         # Convert string labels to integers
         dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
@@ -165,8 +153,8 @@ async def evaluate_text(request: TextEvaluationRequest):
         # Initialize the model once
         classifier = TextClassifier()
-        # Prepare batches with smaller batch size
-        batch_size = 16  # Reduced batch size
         quotes = test_dataset["quote"]
         num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
         batches = [
@@ -177,8 +165,8 @@ async def evaluate_text(request: TextEvaluationRequest):
         # Initialize batch_results
         batch_results = [[] for _ in range(num_batches)]
-        # Process batches in parallel with fewer workers
-        max_workers = min(os.cpu_count(), 2)  # Reduced number of workers
         print(f"Processing with {max_workers} workers")
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
@@ -211,11 +199,6 @@ async def evaluate_text(request: TextEvaluationRequest):
         accuracy = accuracy_score(true_labels, predictions)
         print("accuracy:", accuracy)
-        # Clean up
-        del classifier
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         # Prepare results
         results = {
             "username": username,
@@ -239,4 +222,5 @@ async def evaluate_text(request: TextEvaluationRequest):
     except Exception as e:
         print(f"Error in evaluate_text: {str(e)}")
-        raise Exception(f"Failed to process request: {str(e)}")

 from fastapi import APIRouter
 from datetime import datetime
 import time
     }
     try:
+        # Load and prepare the dataset
+        dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train", token=HF_TOKEN)
         # Convert string labels to integers
         dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
         # Initialize the model once
         classifier = TextClassifier()
+        # Prepare batches
+        batch_size = 24
         quotes = test_dataset["quote"]
         num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
         batches = [
         # Initialize batch_results
         batch_results = [[] for _ in range(num_batches)]
+        # Process batches in parallel
+        max_workers = min(os.cpu_count(), 4)
         print(f"Processing with {max_workers} workers")
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
         accuracy = accuracy_score(true_labels, predictions)
         print("accuracy:", accuracy)
         # Prepare results
         results = {
             "username": username,
     except Exception as e:
         print(f"Error in evaluate_text: {str(e)}")
+        raise Exception(f"Failed to process request: {str(e)}")