submission-template

Sleeping

App Files Files Community

hbanduk commited on Jan 31

Commit

9086772

verified ·

1 Parent(s): 7804b0f

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +18 -32

tasks/text.py CHANGED Viewed

@@ -60,45 +60,31 @@ async def evaluate_text(request: TextEvaluationRequest):
     #true_labels = test_dataset["label"]
     #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
-    from transformers import DistilBertTokenizer
-    import numpy as np
-    import onnxruntime as ort
-    from huggingface_hub import hf_hub_download
-    # Load the ONNX model and tokenizer
-    MODEL_REPO = "ClimateDebunk/Quantized_DistilBertForSequenceClassification"
-    MODEL_FILENAME = "distilbert_quantized_dynamic.onnx"
-    MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
-    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
-    ort_session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
-    # Preprocess the text data
     def preprocess(texts):
-        return tokenizer(
-            texts,
-            padding='max_length',
-            truncation=True,
-            max_length=365,
-            return_tensors="np"
-        )
-    # Run inference
     def predict(texts):
         inputs = preprocess(texts)
-        ort_inputs = {
-            "input_ids": inputs["input_ids"].astype(np.int64),
-            "attention_mask": inputs["attention_mask"].astype(np.int64)
-        }
-        ort_outputs = ort_session.run(None, ort_inputs)
-        logits = ort_outputs[0]
-        predictions = np.argmax(logits, axis=1)
         return predictions
     texts = test_dataset["quote"]
     predictions = predict(texts)
     true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

     #true_labels = test_dataset["label"]
     #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
+    from transformers import AutoModelForSequenceClassification, AutoTokenizer
+    import torch
+    # Load model and tokenizer from Hugging Face Hub
+    MODEL_REPO = "ClimateDebunk/FineTunedDistilBert4SeqClass"
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)
+    model.eval()  # Set to evaluation mode
     def preprocess(texts):
+        """ Tokenize text inputs for DistilBERT """
+        return tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
     def predict(texts):
+        """ Run inference using the fine-tuned DistilBERT model """
         inputs = preprocess(texts)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            predictions = torch.argmax(outputs.logits, dim=1).tolist()
         return predictions
+    # Run inference
     texts = test_dataset["quote"]
     predictions = predict(texts)
     true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE