File size: 2,631 Bytes
1dd8d6e
 
 
376d532
 
 
 
1dd8d6e
376d532
 
 
6adb322
1dd8d6e
 
376d532
c20ba17
1dd8d6e
 
376d532
 
dc37782
376d532
1dd8d6e
 
 
 
 
 
 
376d532
1dd8d6e
376d532
 
1dd8d6e
376d532
 
 
 
 
 
1dd8d6e
 
 
376d532
 
 
 
1dd8d6e
 
 
 
 
 
 
 
c78f9a0
1dd8d6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc37782
 
1dd8d6e
dc37782
 
c78f9a0
dc37782
 
1dd8d6e
c78f9a0
dc37782
1dd8d6e
 
dc37782
376d532
1dd8d6e
376d532
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90


import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr


huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
    pass
    print("no HUGGINGFACE_TOKEN if you need set secret ")
    #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")

model_id = "google/gemma-2-9b-it"

device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16

tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)

print(model_id,device,dtype)
histories = []
#model = None


@spaces.GPU(duration=120)
def generate_text(messages):
    model = AutoModelForCausalLM.from_pretrained(
        model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
    )

    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
    result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)

    generated_output = result[0]["generated_text"]
    if isinstance(generated_output, list):
        for message in reversed(generated_output):
            if message.get("role") == "assistant":
                content= message.get("content", "No content found.")
                return content
            
        return "No assistant response found."
    else:
        return "Unexpected output format."

def call_generate_text(prompt, system_message="You are a helpful assistant."):
    if prompt =="":
        print("empty prompt return")
        return ""
    
    global histories
        
    messages = [
        #{"role": "system", "content": system_message},
    ]
    
    messages += histories 

    user_message = {"role": "user", "content": prompt}

    messages += [user_message]
    
    try:
        text = generate_text(messages)
        histories += [user_message,{"role": "assistant", "content": text}]
       
        return text
    except RuntimeError  as e:
        print(f"An unexpected error occurred: {e}")
       
    return ""

iface = gr.Interface(
    fn=call_generate_text,
    inputs=[
        gr.Textbox(lines=3, label="Input Prompt"),
        #gr.Textbox(lines=2, label="System Message", value="あγͺたはθ¦ͺεˆ‡γͺγ‚’γ‚·γ‚Ήγ‚Ώγƒ³γƒˆγ§εΈΈγ«ζ—₯本θͺžγ§θΏ”答します。"),
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title=f"{model_id}",
    description=f"{model_id} jinja2.exceptions.TemplateError: System role not supported",
)
print("Initialized")


if __name__ == "__main__":
    print("Main")
    iface.launch()