Spaces:

neuralmagic
/

sparse-llama-gsm8k

Paused

mgoin commited on Nov 17, 2023

Commit

d54ef7f

1 Parent(s): bea8480

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown(DESCRIPTION)
         with gr.Column():
-            gr.Markdown("""### MPT GSM Sparse Finetuned Demo""")
             with gr.Group():
                 chatbot = gr.Chatbot(label="Chatbot")
@@ -148,6 +148,7 @@ with gr.Blocks() as demo:
                 inference = pipe(sequences=message, streaming=True, **generation_config)
                 history[-1][1] += message
                 for token in inference:
                     history[-1][1] += token.generations[0].text
                     yield history
                 print(pipe.timer_manager)
@@ -193,7 +194,15 @@ with gr.Blocks() as demo:
                 queue=False,
             ).success(
                 generate,
-                inputs=[saved_input, chatbot, max_new_tokens, temperature],
                 outputs=[chatbot],
                 api_name=False,
             )
@@ -212,7 +221,15 @@ with gr.Blocks() as demo:
                 queue=False,
             ).then(
                 generate,
-                inputs=[saved_input, chatbot, max_new_tokens, temperature],
                 outputs=[chatbot],
                 api_name=False,
             )

         with gr.Column():
             gr.Markdown(DESCRIPTION)
         with gr.Column():
+            gr.Markdown("""### Sparse Finetuned Llama Demo""")
             with gr.Group():
                 chatbot = gr.Chatbot(label="Chatbot")
                 inference = pipe(sequences=message, streaming=True, **generation_config)
                 history[-1][1] += message
                 for token in inference:
+                    print(token.generations[0].text)
                     history[-1][1] += token.generations[0].text
                     yield history
                 print(pipe.timer_manager)
                 queue=False,
             ).success(
                 generate,
+                inputs=[
+                    saved_input,
+                    chatbot,
+                    max_new_tokens,
+                    temperature,
+                    top_p,
+                    top_k,
+                    repetition_penalty,
+                ],
                 outputs=[chatbot],
                 api_name=False,
             )
                 queue=False,
             ).then(
                 generate,
+                inputs=[
+                    saved_input,
+                    chatbot,
+                    max_new_tokens,
+                    temperature,
+                    top_p,
+                    top_k,
+                    repetition_penalty,
+                ],
                 outputs=[chatbot],
                 api_name=False,
             )