Spaces:

joaogante
/

assisted_generation_demo

Running on Zero

joaogante HF Staff commited on May 5, 2023

Commit

588b2d4

1 Parent(s): a1a543e

update model to pythia

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,8 @@ import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
-model_id = "declare-lab/flan-alpaca-large"
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 print("Running on device:", torch_device)
 print("CPU threads:", torch.get_num_threads())
@@ -15,6 +16,7 @@ if torch_device == "cuda":
 else:
     model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
@@ -49,15 +51,10 @@ def reset_textbox():
 with gr.Blocks() as demo:
-    duplicate_link = "https://huggingface.co/spaces/joaogante/transformers_streaming?duplicate=true"
     gr.Markdown(
-        "# 🤗 Transformers 🔥Streaming🔥 on Gradio\n"
-        "This demo showcases the use of the "
-        "[streaming feature](https://huggingface.co/docs/transformers/main/en/generation_strategies#streaming) "
-        "of 🤗 Transformers with Gradio to generate text in real-time. It uses "
-        f"[{model_id}](https://huggingface.co/{model_id}) and the Spaces free compute tier.\n\n"
-        f"Feel free to [duplicate this Space]({duplicate_link}) to try your own models or use this space as a "
-        "template! 💛"
     )
     with gr.Row():

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
+model_id = "EleutherAI/pythia-6.9b-deduped"
+assistant_id = "EleutherAI/pythia-70m-deduped"
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 print("Running on device:", torch_device)
 print("CPU threads:", torch.get_num_threads())
 else:
     model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+assistant_model = AutoModelForSeq2SeqLM.from_pretrained(assistant_id).to(torch_device)
 def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
 with gr.Blocks() as demo:
     gr.Markdown(
+        "# 🤗 Assisted Generation Demo\n"
+        f"Model: {model_id}\n"
+        f"Assistant Model: {assistant_id}"
     )
     with gr.Row():