Spaces:
Runtime error
Runtime error
File size: 2,339 Bytes
1dd8d6e 376d532 1dd8d6e 376d532 6adb322 1dd8d6e 376d532 c20ba17 1dd8d6e 376d532 dc37782 376d532 1dd8d6e 8fd5823 1dd8d6e 8fd5823 376d532 1dd8d6e 376d532 1dd8d6e 376d532 1dd8d6e d665e1b 040d697 9c8dc08 040d697 8abaccc 9c8dc08 1dd8d6e ad0fa67 9c8dc08 1dd8d6e dc37782 8fd5823 dc37782 376d532 8fd5823 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
pass
print("no HUGGINGFACE_TOKEN if you need set secret ")
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
model_id = "google/gemma-2-9b-it"
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
print(model_id,device,dtype)
histories = []
#model = None
model = AutoModelForCausalLM.from_pretrained(
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
@spaces.GPU(duration=120)
def generate_text(messages):
# model = AutoModelForCausalLM.from_pretrained(
# model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
# )
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
generated_output = result[0]["generated_text"]
if isinstance(generated_output, list):
for message in reversed(generated_output):
if message.get("role") == "assistant":
content= message.get("content", "No content found.")
return content
return "No assistant response found."
else:
return "Unexpected output format."
def call_generate_text(message, history):
# history.append({"role": "user", "content": message})
print(message)
print(history)
#messages = history + message
messages =history + [{"role":"user","content":message}]
try:
text = generate_text(history)
#history.append({"role": "assistant", "content": text})
return text
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
return ""
demo = gr.ChatInterface(call_generate_text,type="messages")
if __name__ == "__main__":
demo.launch(share=True)
|