Spaces:

ethzanalytics
/

gpt2-xl-conversational

Runtime error

App Files Files Community

Peter commited on Oct 9, 2022

Commit

0b3d061

1 Parent(s): 235585a

🐛 fix input len bug

Browse files

Signed-off-by: Peter <[email protected]>

Files changed (3) hide show

app.py +8 -5
converse.py +12 -8
grammar_improve.py +5 -3

app.py CHANGED Viewed

@@ -101,11 +101,13 @@ def ask_gpt(
     st = time.perf_counter()
     prompt = clean(message)  # clean user input
     prompt = prompt.strip()  # get rid of any extra whitespace
-    in_len = len(prompt)
     if in_len > 512:
-        prompt = prompt[-512:]  # truncate to 512 chars
-        print(f"Truncated prompt to last 512 chars: started with {in_len} chars")
-        max_len = min(max_len, 512)
     resp = discussion(
         prompt_text=prompt,
@@ -115,7 +117,8 @@ def ask_gpt(
         top_p=top_p,
         top_k=top_k,
         temperature=temperature,
-        max_length=max_len,
     )
     gpt_et = time.perf_counter()
     gpt_rt = round(gpt_et - st, 2)

     st = time.perf_counter()
     prompt = clean(message)  # clean user input
     prompt = prompt.strip()  # get rid of any extra whitespace
+    in_len = len(chat_pipe.tokenizer(prompt).input_ids)
     if in_len > 512:
+        # truncate to last 512 tokens
+        tokens = chat_pipe.tokenizer(prompt).input_ids
+        trunc_tokens = tokens[-512:]
+        prompt = chat_pipe.tokenizer.decode(trunc_tokens)
+        print(f"truncated prompt to {len(trunc_tokens)} tokens, input length: {in_len}")
     resp = discussion(
         prompt_text=prompt,
         top_p=top_p,
         top_k=top_k,
         temperature=temperature,
+        max_length=max_length,
+        min_length=min_length,
     )
     gpt_et = time.perf_counter()
     gpt_rt = round(gpt_et - st, 2)

converse.py CHANGED Viewed

@@ -17,7 +17,8 @@ def discussion(
     responder: str,
     pipeline,
     timeout=45,
-    max_length=128,
     top_p=0.95,
     top_k=50,
     temperature=0.7,
@@ -104,7 +105,8 @@ def gen_response(
     speaker: str,
     responder: str,
     timeout=45,
-    max_length=128,
     top_p=0.95,
     top_k=50,
     temperature=0.7,
@@ -125,7 +127,8 @@ def gen_response(
         responder : str, the name of the person who is responding to the prompt
         pipeline : transformers.Pipeline, the pipeline to use for generating the response
         timeout : int, optional, the number of seconds to wait before timing out, by default 45
-        max_length : int, optional, the maximum number of tokens to generate, defaults to 128
         top_p : float, optional, the top probability to use for sampling, defaults to 0.95
         top_k : int, optional, the top k to use for sampling, defaults to 50
         temperature : float, optional, the temperature to use for sampling, defaults to 0.7
@@ -139,15 +142,16 @@ def gen_response(
         str, the generated text
     """
-    if max_length > 1024:
-        max_length = 1024
-        print("max_length is too large, setting to 1024")
     st = time.perf_counter()
     response = pipeline(
         query,
-        max_length=max_length,
         temperature=temperature,
         top_k=top_k,
         top_p=top_p,

     responder: str,
     pipeline,
     timeout=45,
+    min_length=4,
+    max_length=64,
     top_p=0.95,
     top_k=50,
     temperature=0.7,
     speaker: str,
     responder: str,
     timeout=45,
+    min_length=4,
+    max_length=64,
     top_p=0.95,
     top_k=50,
     temperature=0.7,
         responder : str, the name of the person who is responding to the prompt
         pipeline : transformers.Pipeline, the pipeline to use for generating the response
         timeout : int, optional, the number of seconds to wait before timing out, by default 45
+        min_length : int, optional, the minimum number of tokens to generate, defaults to 4
+        max_length : int, optional, the maximum number of tokens to generate, defaults to 64
         top_p : float, optional, the top probability to use for sampling, defaults to 0.95
         top_k : int, optional, the top k to use for sampling, defaults to 50
         temperature : float, optional, the temperature to use for sampling, defaults to 0.7
         str, the generated text
     """
+    input_len = len(pipeline.tokenizer(query).input_ids)
+    if max_length + input_len > 1024:
+        max_length = max(1024 - input_len, 8)
+        print(f"max_length too large, setting to {max_length}")
     st = time.perf_counter()
     response = pipeline(
         query,
+        min_length=min_length + input_len,
+        max_length=max_length + input_len,
         temperature=temperature,
         top_k=top_k,
         top_p=top_p,

grammar_improve.py CHANGED Viewed

@@ -137,10 +137,11 @@ def synthesize_grammar(
     """
     st = time.perf_counter()
     input_text = clean(message, lower=False)
     results = corrector(
         input_text,
-        max_length=int(1.1 * len(input_text)),
-        min_length=2 if len(input_text) < 64 else int(0.2 * len(input_text)),
         num_beams=num_beams,
         repetition_penalty=repetition_penalty,
         length_penalty=length_penalty,
@@ -479,7 +480,8 @@ def correct_grammar(
     """
     st = time.perf_counter()
-    if len(input_text) < 5:
         return input_text
     max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
     batch = tokenizer(

     """
     st = time.perf_counter()
     input_text = clean(message, lower=False)
+    input_len = len(corrector.tokenizer(input_text).input_ids)
     results = corrector(
         input_text,
+        max_length=int(1.1 * input_len),
+        min_length=2 if input_len < 64 else int(0.2 * input_len),
         num_beams=num_beams,
         repetition_penalty=repetition_penalty,
         length_penalty=length_penalty,
     """
     st = time.perf_counter()
+    if len(tokenizer(input_text).input_ids) < 4:
+        print(f"input text of {input_text} is too short to be corrected")
         return input_text
     max_length = min(int(math.ceil(len(input_text) * 1.2)), 128)
     batch = tokenizer(