frugal-ai-submission-template

Sleeping

App Files Files Community

Tonic commited on Feb 10

Commit

7abed63

verified ·

1 Parent(s): 08e3356

fix model loading

Browse files

Files changed (2) hide show

requirements.txt +1 -1
tasks/text.py +54 -59

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 fastapi==0.103.2
 uvicorn==0.23.2
-transformers==4.34.0
 torch==2.0.1
 datasets==2.14.5
 scikit-learn==1.3.1

 fastapi==0.103.2
 uvicorn==0.23.2
+transformers #==4.34.0
 torch==2.0.1
 datasets==2.14.5
 scikit-learn==1.3.1

tasks/text.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Dict, Tuple
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info, start_tracking, stop_tracking
@@ -24,67 +24,85 @@ class TextClassifier:
     def __init__(self):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         max_retries = 3
         for attempt in range(max_retries):
             try:
-                # Initialize tokenizer and model separately
-                self.tokenizer = AutoTokenizer.from_pretrained("Tonic/climate-guard-toxic-agent")
-                self.model = AutoModelForSequenceClassification.from_pretrained("Tonic/climate-guard-toxic-agent")
                 self.model.to(self.device)
                 self.model.eval()
                 print("Model initialized successfully")
                 break
             except Exception as e:
                 if attempt == max_retries - 1:
                     raise Exception(f"Failed to initialize model after {max_retries} attempts: {str(e)}")
-                print(f"Attempt {attempt + 1} failed, retrying...")
                 time.sleep(1)
     def predict_single(self, text: str) -> int:
         """Predict single text instance"""
         try:
-            # Tokenize and prepare input
             inputs = self.tokenizer(
                 text,
                 return_tensors="pt",
                 truncation=True,
                 max_length=512,
-                padding=True
             ).to(self.device)
             # Get prediction
             with torch.no_grad():
                 outputs = self.model(**inputs)
-                predictions = outputs.logits.argmax(-1)
                 return predictions.item()
         except Exception as e:
             print(f"Error in single prediction: {str(e)}")
             return 0  # Return default prediction on error
     def process_batch(self, batch: List[str], batch_idx: int) -> Tuple[List[int], int]:
         """Process a batch of texts and return their predictions"""
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                print(f"Processing batch {batch_idx} with {len(batch)} items (attempt {attempt + 1})")
-                predictions = []
-                # Process texts one by one for better error handling
-                for text in batch:
-                    pred = self.predict_single(text)
-                    predictions.append(pred)
-                if not predictions:
-                    raise Exception("No predictions generated for batch")
-                print(f"Completed batch {batch_idx} with {len(predictions)} predictions")
-                return predictions, batch_idx
-            except Exception as e:
-                if attempt == max_retries - 1:
-                    print(f"Final error in batch {batch_idx}: {str(e)}")
-                    return [0] * len(batch), batch_idx
-                print(f"Error in batch {batch_idx} (attempt {attempt + 1}): {str(e)}")
-                time.sleep(1)
 @router.post(ROUTE, tags=["Text Task"], description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
@@ -119,7 +137,7 @@ async def evaluate_text(request: TextEvaluationRequest):
     classifier = TextClassifier()
     # Prepare batches
-    batch_size = 16
     quotes = test_dataset["quote"]
     num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
     batches = [
@@ -127,35 +145,12 @@ async def evaluate_text(request: TextEvaluationRequest):
         for i in range(num_batches)
     ]
-    # Initialize batch_results
-    batch_results = [[] for _ in range(num_batches)]
-    # Process batches in parallel
-    max_workers = min(os.cpu_count(), 4)
-    print(f"Processing with {max_workers} workers")
-    with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        future_to_batch = {
-            executor.submit(classifier.process_batch, batch, idx): idx
-            for idx, batch in enumerate(batches)
-        }
-        for future in future_to_batch:
-            batch_idx = future_to_batch[future]
-            try:
-                predictions, idx = future.result()
-                if predictions:
-                    batch_results[idx] = predictions
-                    print(f"Stored results for batch {idx} ({len(predictions)} predictions)")
-            except Exception as e:
-                print(f"Failed to get results for batch {batch_idx}: {e}")
-                batch_results[batch_idx] = [0] * len(batches[batch_idx])
-    # Flatten predictions
     predictions = []
-    for batch_preds in batch_results:
-        if batch_preds is not None:
-            predictions.extend(batch_preds)
     # Stop tracking emissions
     emissions_data = stop_tracking()

 from concurrent.futures import ThreadPoolExecutor
 from typing import List, Dict, Tuple
 import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
 from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info, start_tracking, stop_tracking
     def __init__(self):
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         max_retries = 3
+        model_name = "Tonic/climate-guard-toxic-agent"
         for attempt in range(max_retries):
             try:
+                # Load config first
+                config = AutoConfig.from_pretrained(model_name)
+                # Initialize tokenizer with specific model type
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    model_name,
+                    model_max_length=512,
+                    padding_side='right',
+                    truncation_side='right'
+                )
+                # Initialize model with config
+                self.model = AutoModelForSequenceClassification.from_pretrained(
+                    model_name,
+                    config=config,
+                    torch_dtype=torch.float32
+                )
                 self.model.to(self.device)
                 self.model.eval()
                 print("Model initialized successfully")
                 break
             except Exception as e:
                 if attempt == max_retries - 1:
                     raise Exception(f"Failed to initialize model after {max_retries} attempts: {str(e)}")
+                print(f"Attempt {attempt + 1} failed, retrying... Error: {str(e)}")
                 time.sleep(1)
     def predict_single(self, text: str) -> int:
         """Predict single text instance"""
         try:
+            # Tokenize with explicit padding and truncation
             inputs = self.tokenizer(
                 text,
                 return_tensors="pt",
                 truncation=True,
                 max_length=512,
+                padding='max_length'
             ).to(self.device)
             # Get prediction
             with torch.no_grad():
                 outputs = self.model(**inputs)
+                predictions = torch.argmax(outputs.logits, dim=-1)
                 return predictions.item()
         except Exception as e:
             print(f"Error in single prediction: {str(e)}")
             return 0  # Return default prediction on error
     def process_batch(self, batch: List[str], batch_idx: int) -> Tuple[List[int], int]:
         """Process a batch of texts and return their predictions"""
+        try:
+            print(f"Processing batch {batch_idx} with {len(batch)} items")
+            # Process entire batch at once
+            inputs = self.tokenizer(
+                batch,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512,
+                padding='max_length'
+            ).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                predictions = torch.argmax(outputs.logits, dim=-1).tolist()
+            print(f"Completed batch {batch_idx} with {len(predictions)} predictions")
+            return predictions, batch_idx
+        except Exception as e:
+            print(f"Error in batch {batch_idx}: {str(e)}")
+            return [0] * len(batch), batch_idx
 @router.post(ROUTE, tags=["Text Task"], description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
     classifier = TextClassifier()
     # Prepare batches
+    batch_size = 32  # Increased batch size for efficiency
     quotes = test_dataset["quote"]
     num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
     batches = [
         for i in range(num_batches)
     ]
+    # Process batches sequentially to avoid memory issues
     predictions = []
+    for idx, batch in enumerate(batches):
+        batch_preds, _ = classifier.process_batch(batch, idx)
+        predictions.extend(batch_preds)
+        print(f"Processed batch {idx + 1}/{num_batches}")
     # Stop tracking emissions
     emissions_data = stop_tracking()