jgauthier commited on
Commit
af46379
·
1 Parent(s): 4bd2962

disable truncation. longer sequences are handled just fine, AFAICS

Browse files
Files changed (1) hide show
  1. syntaxgym.py +0 -2
syntaxgym.py CHANGED
@@ -123,7 +123,6 @@ def prepare_tokenizer(model, batch_size, add_start_token=True) -> Tuple[PreTrain
123
  tokenizer_kwargs = {
124
  "add_special_tokens": False,
125
  "padding": True,
126
- "truncation": True,
127
  "max_length": max_tokenized_len
128
  }
129
  return tokenizer, tokenizer_kwargs
@@ -186,7 +185,6 @@ class SyntaxGym(evaluate.EvaluationModule):
186
  assert input_ids.ndim == 2
187
 
188
  # Compute sentence level surprisals.
189
- # TODO support sentences which exceed truncation length
190
  with torch.no_grad():
191
  # Pre-softmax predictive distribution B * T * V
192
  logits = model(input_ids).logits
 
123
  tokenizer_kwargs = {
124
  "add_special_tokens": False,
125
  "padding": True,
 
126
  "max_length": max_tokenized_len
127
  }
128
  return tokenizer, tokenizer_kwargs
 
185
  assert input_ids.ndim == 2
186
 
187
  # Compute sentence level surprisals.
 
188
  with torch.no_grad():
189
  # Pre-softmax predictive distribution B * T * V
190
  logits = model(input_ids).logits