Spaces:
Runtime error
Runtime error
File size: 2,844 Bytes
1dd8d6e 376d532 1dd8d6e 9af0dbc 0ee8fa9 ff4180a 0ee8fa9 45d1199 0ee8fa9 ef229c7 0ee8fa9 69110e0 0ee8fa9 69110e0 5b5b99f 511b690 0ee8fa9 1dd8d6e 8fd5823 376d532 82325f6 376d532 1dd8d6e 376d532 1dd8d6e d665e1b 040d697 9c8dc08 040d697 82325f6 1dd8d6e 82325f6 1dd8d6e dc37782 8fd5823 dc37782 376d532 0ee8fa9 8fd5823 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
text_generator = None
def init():
global text_generator
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
pass
print("no HUGGINGFACE_TOKEN if you need set secret ")
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
model_id = "google/gemma-2-9b-it"
model_id = "microsoft/Phi-3-mini-128k-instruct"
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cuda"
dtype = torch.bfloat16
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
print(model_id,device,dtype)
histories = []
#model = None
model = AutoModelForCausalLM.from_pretrained(
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
if next(model.parameters()).is_cuda:
print("The model is on a GPU")
else:
print("The model is on a CPU")
#print(f"text_generator.device='{text_generator.device}")
if str(text_generator.device).strip() == 'cuda':
print("The pipeline is using a GPU")
else:
print("The pipeline is using a CPU")
print("initialized")
@spaces.GPU(duration=120)
def generate_text(messages):
# model = AutoModelForCausalLM.from_pretrained(
# model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
# )
#text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
generated_output = result[0]["generated_text"]
if isinstance(generated_output, list):
for message in reversed(generated_output):
if message.get("role") == "assistant":
content= message.get("content", "No content found.")
return content
return "No assistant response found."
else:
return "Unexpected output format."
def call_generate_text(message, history):
# history.append({"role": "user", "content": message})
print(message)
print(history)
messages = history+[{"role":"user","content":message}]
try:
text = generate_text(messages)
return text
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
return ""
demo = gr.ChatInterface(call_generate_text,type="messages")
if __name__ == "__main__":
init()
demo.launch(share=True)
|