Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	update imports , textclassifier
Browse files- tasks/text.py +16 -42
    	
        tasks/text.py
    CHANGED
    
    | @@ -8,7 +8,7 @@ from concurrent.futures import ThreadPoolExecutor | |
| 8 | 
             
            from typing import List, Dict, Tuple
         | 
| 9 | 
             
            import torch
         | 
| 10 | 
             
            import torch.nn as nn
         | 
| 11 | 
            -
            from transformers import AutoTokenizer,  | 
| 12 | 
             
            from huggingface_hub import login
         | 
| 13 | 
             
            from dotenv import load_dotenv
         | 
| 14 |  | 
| @@ -42,34 +42,22 @@ class TextClassifier: | |
| 42 | 
             
                            # Initialize tokenizer first
         | 
| 43 | 
             
                            self.tokenizer = AutoTokenizer.from_pretrained(
         | 
| 44 | 
             
                                model_name,
         | 
| 45 | 
            -
                                model_max_length=8192 | 
| 46 | 
             
                                padding_side='right',
         | 
| 47 | 
             
                                truncation_side='right'
         | 
| 48 | 
             
                            )
         | 
| 49 |  | 
| 50 | 
            -
                            #  | 
| 51 | 
            -
                            self. | 
| 52 | 
            -
                                 | 
| 53 | 
            -
                                 | 
| 54 | 
            -
                                 | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
                            self.config.num_attention_heads = 12
         | 
| 60 | 
            -
                            self.config.num_hidden_layers = 12
         | 
| 61 | 
            -
                            self.config.norm_eps = 1e-5
         | 
| 62 | 
            -
                            
         | 
| 63 | 
            -
                            # Initialize model with basic config
         | 
| 64 | 
            -
                            self.model = AutoModelForSequenceClassification.from_pretrained(
         | 
| 65 | 
            -
                                model_name,
         | 
| 66 | 
            -
                                config=self.config,
         | 
| 67 | 
            -
                                ignore_mismatched_sizes=True
         | 
| 68 | 
             
                            )
         | 
| 69 |  | 
| 70 | 
            -
                            # Move model to appropriate device
         | 
| 71 | 
            -
                            self.model = self.model.to(self.device)
         | 
| 72 | 
            -
                            self.model.eval()
         | 
| 73 | 
             
                            print("Model initialized successfully")
         | 
| 74 | 
             
                            break
         | 
| 75 |  | 
| @@ -84,22 +72,9 @@ class TextClassifier: | |
| 84 | 
             
                    try:
         | 
| 85 | 
             
                        print(f"Processing batch {batch_idx} with {len(batch)} items")
         | 
| 86 |  | 
| 87 | 
            -
                        #  | 
| 88 | 
            -
                         | 
| 89 | 
            -
             | 
| 90 | 
            -
                            return_tensors="pt",
         | 
| 91 | 
            -
                            truncation=True,
         | 
| 92 | 
            -
                            max_length=512,
         | 
| 93 | 
            -
                            padding=True
         | 
| 94 | 
            -
                        )
         | 
| 95 | 
            -
                        
         | 
| 96 | 
            -
                        # Move inputs to device
         | 
| 97 | 
            -
                        inputs = {k: v.to(self.device) for k, v in inputs.items()}
         | 
| 98 | 
            -
                        
         | 
| 99 | 
            -
                        # Get predictions
         | 
| 100 | 
            -
                        with torch.no_grad():
         | 
| 101 | 
            -
                            outputs = self.model(**inputs)
         | 
| 102 | 
            -
                            predictions = torch.argmax(outputs.logits, dim=-1).cpu().tolist()
         | 
| 103 |  | 
| 104 | 
             
                        print(f"Completed batch {batch_idx} with {len(predictions)} predictions")
         | 
| 105 | 
             
                        return predictions, batch_idx
         | 
| @@ -110,11 +85,10 @@ class TextClassifier: | |
| 110 |  | 
| 111 | 
             
                def __del__(self):
         | 
| 112 | 
             
                    # Clean up CUDA memory
         | 
| 113 | 
            -
                    if hasattr(self, ' | 
| 114 | 
            -
                        del self. | 
| 115 | 
             
                    if torch.cuda.is_available():
         | 
| 116 | 
             
                        torch.cuda.empty_cache()
         | 
| 117 | 
            -
                
         | 
| 118 |  | 
| 119 | 
             
            @router.post(ROUTE, tags=["Text Task"], description=DESCRIPTION)
         | 
| 120 | 
             
            async def evaluate_text(request: TextEvaluationRequest):
         | 
|  | |
| 8 | 
             
            from typing import List, Dict, Tuple
         | 
| 9 | 
             
            import torch
         | 
| 10 | 
             
            import torch.nn as nn
         | 
| 11 | 
            +
            from transformers import AutoTokenizer, pipeline
         | 
| 12 | 
             
            from huggingface_hub import login
         | 
| 13 | 
             
            from dotenv import load_dotenv
         | 
| 14 |  | 
|  | |
| 42 | 
             
                            # Initialize tokenizer first
         | 
| 43 | 
             
                            self.tokenizer = AutoTokenizer.from_pretrained(
         | 
| 44 | 
             
                                model_name,
         | 
| 45 | 
            +
                                model_max_length=512,  # Reduced from 8192
         | 
| 46 | 
             
                                padding_side='right',
         | 
| 47 | 
             
                                truncation_side='right'
         | 
| 48 | 
             
                            )
         | 
| 49 |  | 
| 50 | 
            +
                            # Use pipeline for simpler initialization
         | 
| 51 | 
            +
                            self.classifier = pipeline(
         | 
| 52 | 
            +
                                "text-classification",
         | 
| 53 | 
            +
                                model=model_name,
         | 
| 54 | 
            +
                                tokenizer=self.tokenizer,
         | 
| 55 | 
            +
                                device=self.device,
         | 
| 56 | 
            +
                                max_length=512,
         | 
| 57 | 
            +
                                truncation=True,
         | 
| 58 | 
            +
                                batch_size=32
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 59 | 
             
                            )
         | 
| 60 |  | 
|  | |
|  | |
|  | |
| 61 | 
             
                            print("Model initialized successfully")
         | 
| 62 | 
             
                            break
         | 
| 63 |  | 
|  | |
| 72 | 
             
                    try:
         | 
| 73 | 
             
                        print(f"Processing batch {batch_idx} with {len(batch)} items")
         | 
| 74 |  | 
| 75 | 
            +
                        # Use pipeline for prediction
         | 
| 76 | 
            +
                        outputs = self.classifier(batch)
         | 
| 77 | 
            +
                        predictions = [int(output['label'].split('_')[0]) for output in outputs]
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 78 |  | 
| 79 | 
             
                        print(f"Completed batch {batch_idx} with {len(predictions)} predictions")
         | 
| 80 | 
             
                        return predictions, batch_idx
         | 
|  | |
| 85 |  | 
| 86 | 
             
                def __del__(self):
         | 
| 87 | 
             
                    # Clean up CUDA memory
         | 
| 88 | 
            +
                    if hasattr(self, 'classifier'):
         | 
| 89 | 
            +
                        del self.classifier
         | 
| 90 | 
             
                    if torch.cuda.is_available():
         | 
| 91 | 
             
                        torch.cuda.empty_cache()
         | 
|  | |
| 92 |  | 
| 93 | 
             
            @router.post(ROUTE, tags=["Text Task"], description=DESCRIPTION)
         | 
| 94 | 
             
            async def evaluate_text(request: TextEvaluationRequest):
         | 
 
			
