Update tasks/text.py
Browse files- tasks/text.py +16 -15
tasks/text.py
CHANGED
@@ -38,27 +38,13 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
38 |
}
|
39 |
|
40 |
# Load and prepare the dataset
|
41 |
-
dataset = load_dataset(request.dataset_name)
|
42 |
|
43 |
# Convert string labels to integers
|
44 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
45 |
|
46 |
# Split dataset
|
47 |
-
train_test = dataset["train"]
|
48 |
test_dataset = dataset["test"]
|
49 |
-
|
50 |
-
# Start tracking emissions
|
51 |
-
tracker.start()
|
52 |
-
tracker.start_task("inference")
|
53 |
-
|
54 |
-
#--------------------------------------------------------------------------------------------
|
55 |
-
# YOUR MODEL INFERENCE CODE HERE
|
56 |
-
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
57 |
-
#--------------------------------------------------------------------------------------------
|
58 |
-
|
59 |
-
# Make random predictions (placeholder for actual model inference)
|
60 |
-
#true_labels = test_dataset["label"]
|
61 |
-
#predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
62 |
|
63 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
64 |
import torch
|
@@ -77,6 +63,21 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
77 |
model.to(device)
|
78 |
model.eval() # Set to evaluation mode
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# tokenize texts
|
82 |
test_encodings = tokenizer(test_dataset["quote"], padding='max_length', truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
|
|
|
38 |
}
|
39 |
|
40 |
# Load and prepare the dataset
|
41 |
+
dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
|
42 |
|
43 |
# Convert string labels to integers
|
44 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
45 |
|
46 |
# Split dataset
|
|
|
47 |
test_dataset = dataset["test"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
50 |
import torch
|
|
|
63 |
model.to(device)
|
64 |
model.eval() # Set to evaluation mode
|
65 |
|
66 |
+
|
67 |
+
# Start tracking emissions
|
68 |
+
tracker.start()
|
69 |
+
tracker.start_task("inference")
|
70 |
+
|
71 |
+
#--------------------------------------------------------------------------------------------
|
72 |
+
# YOUR MODEL INFERENCE CODE HERE
|
73 |
+
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
74 |
+
#--------------------------------------------------------------------------------------------
|
75 |
+
|
76 |
+
# Make random predictions (placeholder for actual model inference)
|
77 |
+
#true_labels = test_dataset["label"]
|
78 |
+
#predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
79 |
+
|
80 |
+
|
81 |
|
82 |
# tokenize texts
|
83 |
test_encodings = tokenizer(test_dataset["quote"], padding='max_length', truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
|