Spaces:
Sleeping
Sleeping
changed editing style
Browse files- tasks/text.py +18 -62
tasks/text.py
CHANGED
|
@@ -18,30 +18,15 @@ router = APIRouter()
|
|
| 18 |
DESCRIPTION = "Electra_Base"
|
| 19 |
ROUTE = "/text"
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def __iter__(self):
|
| 32 |
-
for text, label in zip(self.texts, self.labels):
|
| 33 |
-
encoding = self.tokenizer(
|
| 34 |
-
text,
|
| 35 |
-
truncation=True,
|
| 36 |
-
padding='max_length',
|
| 37 |
-
max_length=self.max_length,
|
| 38 |
-
return_tensors='tf'
|
| 39 |
-
)
|
| 40 |
-
yield {
|
| 41 |
-
'input_ids': encoding['input_ids'][0],
|
| 42 |
-
'attention_mask': encoding['attention_mask'][0],
|
| 43 |
-
'label': tf.constant(label, dtype=tf.int32)
|
| 44 |
-
}
|
| 45 |
|
| 46 |
@router.post(ROUTE, tags=["Text Task"],
|
| 47 |
description=DESCRIPTION)
|
|
@@ -66,41 +51,15 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 66 |
"7_fossil_fuels_needed": 7
|
| 67 |
}
|
| 68 |
|
| 69 |
-
# Download pre-trained model weights and config from Hugging Face
|
| 70 |
-
model_weights_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="tf_model.h5")
|
| 71 |
-
model_config_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="config.json")
|
| 72 |
-
|
| 73 |
-
# Load the configuration
|
| 74 |
-
config = ElectraConfig.from_json_file(model_config_path)
|
| 75 |
-
|
| 76 |
-
# Create the model with the loaded configuration
|
| 77 |
-
model = TFElectraForSequenceClassification(config)
|
| 78 |
-
|
| 79 |
-
# Load the weights
|
| 80 |
-
model.load_weights(model_weights_path)
|
| 81 |
-
|
| 82 |
-
# Load the tokenizer
|
| 83 |
-
tokenizer = ElectraTokenizer.from_pretrained("google/electra-base-discriminator")
|
| 84 |
-
|
| 85 |
-
# Compile the model (if needed for inference)
|
| 86 |
-
model.compile(optimizer='adam',
|
| 87 |
-
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
|
| 88 |
-
metrics=['accuracy'])
|
| 89 |
-
|
| 90 |
# Load and prepare the dataset
|
| 91 |
dataset = load_dataset(request.dataset_name)
|
| 92 |
|
| 93 |
# Convert string labels to integers
|
| 94 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
| 101 |
-
|
| 102 |
-
# Get the test dataset
|
| 103 |
-
test_dataset = tokenized_dataset["test"]
|
| 104 |
|
| 105 |
# Start tracking emissions
|
| 106 |
tracker.start()
|
|
@@ -111,17 +70,14 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 111 |
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
| 112 |
#--------------------------------------------------------------------------------------------
|
| 113 |
|
| 114 |
-
# Add error handling
|
| 115 |
try:
|
| 116 |
-
#
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
# Make predictions
|
| 121 |
-
|
| 122 |
-
predictions = tf.
|
| 123 |
-
|
| 124 |
-
|
| 125 |
# Get true labels
|
| 126 |
true_labels = test_dataset["label"]
|
| 127 |
except Exception as e:
|
|
|
|
| 18 |
DESCRIPTION = "Electra_Base"
|
| 19 |
ROUTE = "/text"
|
| 20 |
|
| 21 |
+
# Load model and tokenizer
|
| 22 |
+
model_weights_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="tf_model.h5")
|
| 23 |
+
model_config_path = hf_hub_download(repo_id="jennasparks/electra-tf", filename="config.json")
|
| 24 |
+
|
| 25 |
+
config = ElectraConfig.from_json_file(model_config_path)
|
| 26 |
+
model = TFElectraForSequenceClassification(config)
|
| 27 |
+
model.load_weights(model_weights_path)
|
| 28 |
+
tokenizer = ElectraTokenizer.from_pretrained("google/electra-base-discriminator")
|
| 29 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
@router.post(ROUTE, tags=["Text Task"],
|
| 32 |
description=DESCRIPTION)
|
|
|
|
| 51 |
"7_fossil_fuels_needed": 7
|
| 52 |
}
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
# Load and prepare the dataset
|
| 55 |
dataset = load_dataset(request.dataset_name)
|
| 56 |
|
| 57 |
# Convert string labels to integers
|
| 58 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 59 |
|
| 60 |
+
# Split dataset
|
| 61 |
+
train_test = dataset["train"]
|
| 62 |
+
test_dataset = dataset["test"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# Start tracking emissions
|
| 65 |
tracker.start()
|
|
|
|
| 70 |
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
| 71 |
#--------------------------------------------------------------------------------------------
|
| 72 |
|
|
|
|
| 73 |
try:
|
| 74 |
+
# Tokenize the input texts
|
| 75 |
+
encoded_input = tokenizer(test_dataset["text"], truncation=True, padding=True, return_tensors="tf")
|
| 76 |
+
|
|
|
|
| 77 |
# Make predictions
|
| 78 |
+
outputs = model(encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"], training=False)
|
| 79 |
+
predictions = tf.argmax(outputs.logits, axis=1).numpy()
|
| 80 |
+
|
|
|
|
| 81 |
# Get true labels
|
| 82 |
true_labels = test_dataset["label"]
|
| 83 |
except Exception as e:
|