Spaces:
Sleeping
Sleeping
add custom layernorm patch, low mem usage
Browse files- tasks/text.py +5 -4
tasks/text.py
CHANGED
@@ -50,7 +50,7 @@ class TextClassifier:
|
|
50 |
# Initialize tokenizer
|
51 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
52 |
model_name,
|
53 |
-
model_max_length=
|
54 |
padding_side='right',
|
55 |
truncation_side='right'
|
56 |
)
|
@@ -64,7 +64,8 @@ class TextClassifier:
|
|
64 |
self.model = AutoModelForSequenceClassification.from_pretrained(
|
65 |
model_name,
|
66 |
config=self.config,
|
67 |
-
ignore_mismatched_sizes=True
|
|
|
68 |
)
|
69 |
finally:
|
70 |
# Restore original LayerNorm
|
@@ -91,7 +92,7 @@ class TextClassifier:
|
|
91 |
batch,
|
92 |
return_tensors="pt",
|
93 |
truncation=True,
|
94 |
-
max_length=
|
95 |
padding='max_length'
|
96 |
).to(self.device)
|
97 |
|
@@ -146,7 +147,7 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
146 |
classifier = TextClassifier()
|
147 |
|
148 |
# Prepare batches
|
149 |
-
batch_size =
|
150 |
quotes = test_dataset["quote"]
|
151 |
num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
|
152 |
batches = [
|
|
|
50 |
# Initialize tokenizer
|
51 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
52 |
model_name,
|
53 |
+
model_max_length=2048,
|
54 |
padding_side='right',
|
55 |
truncation_side='right'
|
56 |
)
|
|
|
64 |
self.model = AutoModelForSequenceClassification.from_pretrained(
|
65 |
model_name,
|
66 |
config=self.config,
|
67 |
+
ignore_mismatched_sizes=True,
|
68 |
+
low_cpu_mem_usage=True
|
69 |
)
|
70 |
finally:
|
71 |
# Restore original LayerNorm
|
|
|
92 |
batch,
|
93 |
return_tensors="pt",
|
94 |
truncation=True,
|
95 |
+
max_length=2048,
|
96 |
padding='max_length'
|
97 |
).to(self.device)
|
98 |
|
|
|
147 |
classifier = TextClassifier()
|
148 |
|
149 |
# Prepare batches
|
150 |
+
batch_size = 24
|
151 |
quotes = test_dataset["quote"]
|
152 |
num_batches = len(quotes) // batch_size + (1 if len(quotes) % batch_size != 0 else 0)
|
153 |
batches = [
|