submission-template

Sleeping

App Files Files Community

hbanduk commited on Jan 31

Commit

e740326

verified ·

1 Parent(s): 12f942f

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +32 -19

tasks/text.py CHANGED Viewed

@@ -60,33 +60,46 @@ async def evaluate_text(request: TextEvaluationRequest):
     #true_labels = test_dataset["label"]
     #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
-    from transformers import AutoModelForSequenceClassification, AutoTokenizer
-    import torch
-    # Load model and tokenizer from Hugging Face Hub
-    MODEL_REPO = "ClimateDebunk/FineTunedDistilBert4SeqClass"
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
-    model = AutoModelForSequenceClassification.from_pretrained(MODEL_REPO)
-    model.eval()  # Set to evaluation mode
     def preprocess(texts):
-        """ Tokenize text inputs for DistilBERT """
-        return tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")
     def predict(texts):
-        """ Run inference using the fine-tuned DistilBERT model """
         inputs = preprocess(texts)
-        with torch.no_grad():
-            outputs = model(**inputs)
-            predictions = torch.argmax(outputs.logits, dim=1).tolist()
         return predictions
-    # Run inference
     texts = test_dataset["text"]
     predictions = predict(texts)
-    true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------

     #true_labels = test_dataset["label"]
     #predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
+    from transformers import DistilBertTokenizer
+    import numpy as np
+    import onnxruntime as ort
+    from huggingface_hub import hf_hub_download
+    # Load the ONNX model and tokenizer
+    MODEL_REPO = "ClimateDebunk/Quantized_DistilBertForSequenceClassification"
+    MODEL_FILENAME = "distilbert_quantized_dynamic.onnx"
+    MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
+    ort_session = ort.InferenceSession(MODEL_PATH, providers=["CPUExecutionProvider"])
+    # Preprocess the text data
     def preprocess(texts):
+        return tokenizer(
+            texts,
+            padding=True,
+            truncation=True,
+            max_length=365,
+            return_tensors="np"
+        )
+    # Run inference
     def predict(texts):
         inputs = preprocess(texts)
+        ort_inputs = {
+            "input_ids": inputs["input_ids"].astype(np.int64),
+            "attention_mask": inputs["attention_mask"].astype(np.int64)
+        }
+        ort_outputs = ort_session.run(None, ort_inputs)
+        logits = ort_outputs[0]
+        predictions = np.argmax(logits, axis=1)
         return predictions
     texts = test_dataset["text"]
     predictions = predict(texts)
+    true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------