submission-template

Sleeping

App Files Files Community

hbanduk commited on Jan 31

Commit

bc7edfa

verified ·

1 Parent(s): 371a733

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +22 -45

tasks/text.py CHANGED Viewed

@@ -60,60 +60,37 @@ async def evaluate_text(request: TextEvaluationRequest):
     #true_labels = test_dataset["label"]
     #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
-    from transformers import DistilBertTokenizer
-    import numpy as np
-    import onnxruntime as ort
-    from huggingface_hub import hf_hub_download
-    # Load the ONNX model and tokenizer
-    MODEL_REPO = "ClimateDebunk/Quantized_DistilBertForSequenceClassification"
-    MODEL_FILENAME = "distilbert_quantized_dynamic.onnx"
-    try:
-        MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
-        print(f"Model successfully downloaded at: {MODEL_PATH}")
-        tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
-        print("Tokenizer loaded successfully!")
-        ort_session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
-        print("ONNX session initialized successfully!")
-    except Exception as e:
-        print(f"Error loading ONNX model: {e}")
-    # Preprocess the text data
     def preprocess(texts):
-        print(f"📌 Preprocessing {len(texts)} text samples...")
-        inputs = tokenizer(
-            texts,
-            padding='max_length',
-            truncation=True,
-            max_length=365,
-            return_tensors="np"
-        )
-        print(f"Tokenized input_ids shape: {inputs['input_ids'].shape}")
-        print(f"Tokenized attention_mask shape: {inputs['attention_mask'].shape}")
-        return inputs
-    # Run inference
     def predict(texts):
-        print(f"📌 Running inference on {len(texts)} samples...")
         inputs = preprocess(texts)
-        ort_inputs = {
-            "input_ids": inputs["input_ids"].astype(np.int64),
-            "attention_mask": inputs["attention_mask"].astype(np.int64)
-        }
-        ort_outputs = ort_session.run(None, ort_inputs)
-        logits = ort_outputs[0]
-        predictions = np.argmax(logits, axis=1)
         return predictions
-     # Replace the random predictions with actual model predictions
     texts = test_dataset["quote"]
     predictions = predict(texts)
     true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

     #true_labels = test_dataset["label"]
     #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer
+    import torch
+    # Load model and tokenizer from Hugging Face Hub
+    MODEL_REPO = "ClimateDebunk/FineTunedDistilBert4SeqClass"
+    MODEL_FILENAME = "distilbert_trained.pth"
+    model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)
+    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
+    model.eval()  # Set to evaluation mode
     def preprocess(texts):
+        """ Tokenize text inputs for DistilBERT """
+        return tokenizer(texts, padding='max_length', truncation=True, max_length=365, return_tensors="pt")
     def predict(texts):
+        """ Run inference using the fine-tuned DistilBERT model """
         inputs = preprocess(texts)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            predictions = torch.argmax(outputs.logits, dim=1).tolist()
         return predictions
+    # Run inference
     texts = test_dataset["quote"]
     predictions = predict(texts)
     true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE