Spaces:

Akjava
/

chat-phi-4-deepseek-R1K-RL-EZO

Runtime error

Akjava commited on Sep 20, 2024

Commit

43cc94e

verified ·

1 Parent(s): 8ecfb8d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import spaces
 import os
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import gradio as gr
 text_generator = None
@@ -36,7 +37,7 @@ if not is_hugging_face:
     model = AutoModelForCausalLM.from_pretrained(
         model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
     )
-    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
     if next(model.parameters()).is_cuda:
         print("The model is on a GPU")
@@ -57,9 +58,10 @@ def generate_text(messages):
         model = AutoModelForCausalLM.from_pretrained(
                 model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
             )
-        text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ,stream=True) #pipeline has not to(device)
     result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
     generated_output = ""
     for token in result:
         generated_output += token["generated_token"]

 import os
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from transformers import TextStreamer
 import gradio as gr
 text_generator = None
     model = AutoModelForCausalLM.from_pretrained(
         model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
     )
+    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device,stream=True ) #pipeline has not to(device)
     if next(model.parameters()).is_cuda:
         print("The model is on a GPU")
         model = AutoModelForCausalLM.from_pretrained(
                 model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
             )
+    streamer = TextStreamer(tokenizer, skip_prompt=True)
+    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ,streamer=streamer) #pipeline has not to(device)
     result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
+    print(result)
     generated_output = ""
     for token in result:
         generated_output += token["generated_token"]