text-jennasparks

Sleeping

App Files Files Community

jennasparks commited on Feb 1

Commit

54b86de

verified ·

1 Parent(s): 19efa32

changed editing style

Browse files

Files changed (1) hide show

tasks/text.py +18 -62

tasks/text.py CHANGED Viewed

@@ -18,30 +18,15 @@ router = APIRouter()
 DESCRIPTION = "Electra_Base"
 ROUTE = "/text"
-class CustomTFDataset(tf.data.Dataset):
-    def __init__(self, texts, labels, tokenizer, max_length=128):
-        self.texts = texts
-        self.labels = labels
-        self.tokenizer = tokenizer
-        self.max_length = max_length
-    def __len__(self):
-        return len(self.texts)
-    def __iter__(self):
-        for text, label in zip(self.texts, self.labels):
-            encoding = self.tokenizer(
-                text,
-                truncation=True,
-                padding='max_length',
-                max_length=self.max_length,
-                return_tensors='tf'
-            )
-            yield {
-                'input_ids': encoding['input_ids'][0],
-                'attention_mask': encoding['attention_mask'][0],
-                'label': tf.constant(label, dtype=tf.int32)
-            }
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
@@ -66,41 +51,15 @@ async def evaluate_text(request: TextEvaluationRequest):
         "7_fossil_fuels_needed": 7
     }
-    # Download pre-trained model weights and config from Hugging Face
-    model_weights_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="tf_model.h5")
-    model_config_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="config.json")
-    # Load the configuration
-    config = ElectraConfig.from_json_file(model_config_path)
-    # Create the model with the loaded configuration
-    model = TFElectraForSequenceClassification(config)
-    # Load the weights
-    model.load_weights(model_weights_path)
-    # Load the tokenizer
-    tokenizer = ElectraTokenizer.from_pretrained("google/electra-base-discriminator")
-    # Compile the model (if needed for inference)
-    model.compile(optimizer='adam',
-                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
-                  metrics=['accuracy'])
     # Load and prepare the dataset
     dataset = load_dataset(request.dataset_name)
     # Convert string labels to integers
     dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
-    # Tokenize the dataset
-    def tokenize_function(examples):
-        return tokenizer(examples["text"], padding="max_length", truncation=True)
-    tokenized_dataset = dataset.map(tokenize_function, batched=True)
-    # Get the test dataset
-    test_dataset = tokenized_dataset["test"]
     # Start tracking emissions
     tracker.start()
@@ -111,17 +70,14 @@ async def evaluate_text(request: TextEvaluationRequest):
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    # Add error handling
     try:
-        # Prepare input for the model
-        input_ids = tf.convert_to_tensor(test_dataset["input_ids"])
-        attention_mask = tf.convert_to_tensor(test_dataset["attention_mask"])
         # Make predictions
-        predictions = model(input_ids, attention_mask=attention_mask, training=False)
-        predictions = tf.nn.softmax(predictions.logits, axis=-1)
-        predictions = tf.argmax(predictions, axis=-1).numpy()
         # Get true labels
         true_labels = test_dataset["label"]
     except Exception as e:

 DESCRIPTION = "Electra_Base"
 ROUTE = "/text"
+# Load model and tokenizer
+model_weights_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="tf_model.h5")
+model_config_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="config.json")
+config = ElectraConfig.from_json_file(model_config_path)
+model = TFElectraForSequenceClassification(config)
+model.load_weights(model_weights_path)
+tokenizer = ElectraTokenizer.from_pretrained("google/electra-base-discriminator")
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
         "7_fossil_fuels_needed": 7
     }
     # Load and prepare the dataset
     dataset = load_dataset(request.dataset_name)
     # Convert string labels to integers
     dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
+    # Split dataset
+    train_test = dataset["train"]
+    test_dataset = dataset["test"]
     # Start tracking emissions
     tracker.start()
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
     try:
+        # Tokenize the input texts
+        encoded_input = tokenizer(test_dataset["text"], truncation=True, padding=True, return_tensors="tf")
         # Make predictions
+        outputs = model(encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"], training=False)
+        predictions = tf.argmax(outputs.logits, axis=1).numpy()
         # Get true labels
         true_labels = test_dataset["label"]
     except Exception as e: