Spaces:
Runtime error
Runtime error
File size: 2,304 Bytes
8aaa2cd 0fb257c 8aaa2cd 0fb257c 8aaa2cd 0fb257c c411706 3ab88cc 0fb257c 3ab88cc 0fb257c 8aaa2cd 0fb257c 8aaa2cd 0fb257c c411706 e3b2b8b c411706 8aaa2cd c411706 0fb257c 82fe467 0fb257c 82fe467 0fb257c e3b2b8b 0fb257c 484969a 0fb257c 8aaa2cd 0fb257c c411706 0fb257c 8aaa2cd c411706 0e10553 c411706 0fb257c 8aaa2cd 0fb257c 16126cc 0fb257c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import TextIteratorStreamer
from threading import Thread
import gradio as gr
text_generator = None
model_id = "AXCXEPT/phi-4-deepseek-R1K-RL-EZO"
#model_id = "AXCXEPT/phi-4-open-R1-Distill-EZOv1"#not well work with my old code
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
huggingface_token = None
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cuda"
dtype = torch.bfloat16
if not huggingface_token:
pass
print("no HUGGINGFACE_TOKEN if you need set secret ")
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
#print(tokenizer.special_tokens_map)
# ็นๆฎใใผใฏใณIDใ็ขบ่ช
#print(tokenizer.eos_token_id)
#print(tokenizer.encode("<|im_end|>", add_special_tokens=False))
#print(model_id,device,dtype)
histories = []
model = AutoModelForCausalLM.from_pretrained(
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
)
model.to(device)
def generate_text(messages):
question = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
question = tokenizer(question, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
generation_kwargs = dict(question, streamer=streamer, max_new_tokens=1000)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
generated_output = ""
thread.start()
for new_text in streamer:
generated_output += new_text.replace("<|im_end|>","")#just replace
yield generated_output
# SDK version is very important in README.md
@spaces.GPU(duration=120)
def call_generate_text(message, history):
messages = history+[{"role":"user","content":message}]
try:
for text in generate_text(messages):
yield text
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
yield ""
demo = gr.ChatInterface(call_generate_text,type="messages")
if __name__ == "__main__":
demo.queue()
demo.launch() |