llama-2-13b-chat-transformers

Sleeping

freddyaboulton HF Staff commited on Jul 25, 2023

Commit

94f6ac7

1 Parent(s): 1890c65

Use chatinterface

Files changed (1) hide show

app.py CHANGED Viewed

@@ -47,8 +47,9 @@ def generate(
         response = f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Please create a new thread.'
     else:
         response = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
-    return response, history + [(message, response)]
 with gr.Blocks() as demo:
@@ -67,17 +68,13 @@ with gr.Blocks() as demo:
     ```python
     secrets = {"HUGGING_FACE_HUB_TOKEN": "<your-key-here>",}
     client = grc.Client.duplicate("gradio-discord-bots/llama-2-13b-chat-transformers", secrets=secrets, hardware="a10g-small")
-    client.deploy_discord(api_names=["chat"])
     ```
     """
     )
     gr.Markdown(LICENSE)
     with gr.Row(visible=False):
-        state = gr.State([])
-        msg = gr.Textbox()
-        output = gr.Textbox()
-        btn = gr.Button()
-        btn.click(generate, [msg, state], [output, state], api_name="chat")
 demo.queue(max_size=20).launch()

         response = f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Please create a new thread.'
     else:
         response = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
+    return response
+interface = gr.ChatInterface(generate)
 with gr.Blocks() as demo:
     ```python
     secrets = {"HUGGING_FACE_HUB_TOKEN": "<your-key-here>",}
     client = grc.Client.duplicate("gradio-discord-bots/llama-2-13b-chat-transformers", secrets=secrets, hardware="a10g-small")
+    client.deploy_discord(api_names=["chat"], hf_token="<your-key-here>")
     ```
     """
     )
     gr.Markdown(LICENSE)
     with gr.Row(visible=False):
+        interface.render()
 demo.queue(max_size=20).launch()