Spaces:
Runtime error
Runtime error
File size: 2,631 Bytes
1dd8d6e 376d532 1dd8d6e 376d532 6adb322 1dd8d6e 376d532 c20ba17 1dd8d6e 376d532 dc37782 376d532 1dd8d6e 376d532 1dd8d6e 376d532 1dd8d6e 376d532 1dd8d6e 376d532 1dd8d6e c78f9a0 1dd8d6e dc37782 1dd8d6e dc37782 c78f9a0 dc37782 1dd8d6e c78f9a0 dc37782 1dd8d6e dc37782 376d532 1dd8d6e 376d532 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
pass
print("no HUGGINGFACE_TOKEN if you need set secret ")
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
model_id = "google/gemma-2-9b-it"
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
print(model_id,device,dtype)
histories = []
#model = None
@spaces.GPU(duration=120)
def generate_text(messages):
model = AutoModelForCausalLM.from_pretrained(
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
generated_output = result[0]["generated_text"]
if isinstance(generated_output, list):
for message in reversed(generated_output):
if message.get("role") == "assistant":
content= message.get("content", "No content found.")
return content
return "No assistant response found."
else:
return "Unexpected output format."
def call_generate_text(prompt, system_message="You are a helpful assistant."):
if prompt =="":
print("empty prompt return")
return ""
global histories
messages = [
#{"role": "system", "content": system_message},
]
messages += histories
user_message = {"role": "user", "content": prompt}
messages += [user_message]
try:
text = generate_text(messages)
histories += [user_message,{"role": "assistant", "content": text}]
return text
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
return ""
iface = gr.Interface(
fn=call_generate_text,
inputs=[
gr.Textbox(lines=3, label="Input Prompt"),
#gr.Textbox(lines=2, label="System Message", value="γγͺγγ―θ¦ͺεγͺγ’γ·γΉγΏγ³γγ§εΈΈγ«ζ₯ζ¬θͺγ§θΏηγγΎγγ"),
],
outputs=gr.Textbox(label="Generated Text"),
title=f"{model_id}",
description=f"{model_id} jinja2.exceptions.TemplateError: System role not supported",
)
print("Initialized")
if __name__ == "__main__":
print("Main")
iface.launch() |