Spaces:

SimpleBerry
/

LLaMA-O1-Supervised-1129-Demo

Running

Di Zhang commited on Dec 2, 2024

Commit

69c6a7a

verified ·

1 Parent(s): c1f3a83

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95)
     input_texts = [input_text.replace('<|end_of_text|>','') for input_text in input_texts]
     #print(f"input_texts[0]: {input_texts[0]}")
     inputs = model.tokenize(input_texts[0].encode('utf-8'))
-    for token in model.generate(inputs, top_p=top_p, temp=temperature,):
         #print(f"token: {token}")
         text = model.detokenize([token])
         #print(f"text detok: {text}")
@@ -57,7 +57,7 @@ with gr.Blocks() as demo:
             ['If Diana needs to bike 10 miles to reach home and she can bike at a speed of 3 mph for two hours before getting tired, and then at a speed of 1 mph until she reaches home, how long will it take her to get home?'],
             ['Find the least odd prime factor of $2019^8+1$.'],
         ],
-        cache_examples=True,
         fill_height=True
     )

     input_texts = [input_text.replace('<|end_of_text|>','') for input_text in input_texts]
     #print(f"input_texts[0]: {input_texts[0]}")
     inputs = model.tokenize(input_texts[0].encode('utf-8'))
+    for token in model(inputs, top_p=top_p, temp=temperature, stream=True):
         #print(f"token: {token}")
         text = model.detokenize([token])
         #print(f"text detok: {text}")
             ['If Diana needs to bike 10 miles to reach home and she can bike at a speed of 3 mph for two hours before getting tired, and then at a speed of 1 mph until she reaches home, how long will it take her to get home?'],
             ['Find the least odd prime factor of $2019^8+1$.'],
         ],
+        cache_examples=False,
         fill_height=True
     )