text-jennasparks

Sleeping

App Files Files Community

jennasparks commited on Jan 31

Commit

4827d61

verified ·

1 Parent(s): bee8fd0

added tokenization as preprocessing

Browse files

Files changed (1) hide show

tasks/text.py +31 -8

tasks/text.py CHANGED Viewed

@@ -10,6 +10,9 @@ from .utils.emissions import tracker, clean_emissions_data, get_space_info
 import tensorflow as tf
 from huggingface_hub import hf_hub_download
 router = APIRouter()
 DESCRIPTION = "Electra"
@@ -40,6 +43,13 @@ async def evaluate_text(request: TextEvaluationRequest):
         "7_fossil_fuels_needed": 7
     }
     # Download our pre-trained model from Hugging Face
     model_path = hf_hub_download(repo_id="jennasparks/frugal-ai-text-electra-base", filename="checkpoint_epoch_5.weights.h5")
@@ -49,12 +59,19 @@ async def evaluate_text(request: TextEvaluationRequest):
     # Load and prepare the dataset
     dataset = load_dataset(request.dataset_name)
-    # Convert string labels to integers
     dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
     # Split dataset
-    train_test = dataset["train"]
-    test_dataset = dataset["test"]
     # Start tracking emissions
     tracker.start()
@@ -64,12 +81,18 @@ async def evaluate_text(request: TextEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    # Make predictions
-    predictions = model.predict(test_dataset)
-    # Get true labels
-    true_labels = test_dataset["label"]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 import tensorflow as tf
 from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer
+import numpy as np
 router = APIRouter()
 DESCRIPTION = "Electra"
         "7_fossil_fuels_needed": 7
     }
+    # Initialize tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("google/electra-small-discriminator")
+    def preprocess_function(examples):
+        return tokenizer(examples["text"],
+                         truncation=True, padding="max_length")
     # Download our pre-trained model from Hugging Face
     model_path = hf_hub_download(repo_id="jennasparks/frugal-ai-text-electra-base", filename="checkpoint_epoch_5.weights.h5")
     # Load and prepare the dataset
     dataset = load_dataset(request.dataset_name)
+    # # Convert string labels to integers
+    # dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
+    # # Split dataset
+    # train_test = dataset["train"]
+    # test_dataset = dataset["test"]
+    # Convert string labels to integers and tokenize
     dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
+    tokenized_dataset = dataset.map(preprocess_function, batched=True)
     # Split dataset
+    test_dataset = tokenized_dataset["test"]
     # Start tracking emissions
     tracker.start()
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
+    # Added error handling
+    try:
+        # Make predictions
+        predictions = model.predict(test_dataset["input_ids"])
+        predictions = np.argmax(predictions, axis=1)
+        # Get true labels
+        true_labels = test_dataset["label"]
+    except Exception as e:
+        print(f"An error occurred during prediction: {str(e)}")
+        raise
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE