Spaces:

cpllab
/

syntaxgym

Sleeping

jgauthier commited on Jul 7, 2022

Commit

af46379

1 Parent(s): 4bd2962

disable truncation. longer sequences are handled just fine, AFAICS

Files changed (1) hide show

syntaxgym.py CHANGED Viewed

@@ -123,7 +123,6 @@ def prepare_tokenizer(model, batch_size, add_start_token=True) -> Tuple[PreTrain
     tokenizer_kwargs = {
         "add_special_tokens": False,
         "padding": True,
-        "truncation": True,
         "max_length": max_tokenized_len
     }
     return tokenizer, tokenizer_kwargs
@@ -186,7 +185,6 @@ class SyntaxGym(evaluate.EvaluationModule):
         assert input_ids.ndim == 2
         # Compute sentence level surprisals.
-        # TODO support sentences which exceed truncation length
         with torch.no_grad():
             # Pre-softmax predictive distribution B * T * V
             logits = model(input_ids).logits

     tokenizer_kwargs = {
         "add_special_tokens": False,
         "padding": True,
         "max_length": max_tokenized_len
     }
     return tokenizer, tokenizer_kwargs
         assert input_ids.ndim == 2
         # Compute sentence level surprisals.
         with torch.no_grad():
             # Pre-softmax predictive distribution B * T * V
             logits = model(input_ids).logits