Akjava commited on
Commit
1dd8d6e
Β·
1 Parent(s): c20ba17
Files changed (1) hide show
  1. app.py +53 -20
app.py CHANGED
@@ -1,57 +1,90 @@
 
 
 
1
  import os
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  import gradio as gr
5
- import spaces
6
 
7
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
8
  if not huggingface_token:
9
  pass
10
- raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
 
11
 
12
  model_id = "google/gemma-2-9b-it"
13
- #model_id = "microsoft/Phi-3-mini-128k-instruct"
14
- # device_map style value auto not cuda
15
- device = "auto" #torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  dtype = torch.bfloat16
17
 
18
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
19
 
20
- #print(model_id,device,dtype)
21
- @spaces.GPU
22
- def generate_text(prompt, system_message="You are a helpful assistant."):
 
 
 
 
23
  model = AutoModelForCausalLM.from_pretrained(
24
- model_id, torch_dtype=dtype,device_map=device, token=huggingface_token
25
  )
26
- text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=dtype, device_map=device)
27
-
28
- messages = [
29
- {"role": "system", "content": system_message},
30
- {"role": "user", "content": prompt},
31
- ]
32
 
 
33
  result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
34
 
35
  generated_output = result[0]["generated_text"]
36
  if isinstance(generated_output, list):
37
  for message in reversed(generated_output):
38
  if message.get("role") == "assistant":
39
- return message.get("content", "No content found.")
 
 
40
  return "No assistant response found."
41
  else:
42
  return "Unexpected output format."
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  iface = gr.Interface(
46
- fn=generate_text,
47
  inputs=[
48
  gr.Textbox(lines=3, label="Input Prompt"),
49
- gr.Textbox(lines=2, label="System Message", value="You are a helpful assistant."),
50
  ],
51
  outputs=gr.Textbox(label="Generated Text"),
52
- title="google/gemma-2-9b-it Text Generation",
53
- description="Enter a prompt and optional system message to generate text using the google/gemma-2-9b-it model.",
54
  )
 
 
55
 
56
  if __name__ == "__main__":
 
57
  iface.launch()
 
1
+
2
+
3
+ import spaces
4
  import os
5
  import torch
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
  import gradio as gr
8
+
9
 
10
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
11
  if not huggingface_token:
12
  pass
13
+ print("no HUGGINGFACE_TOKEN if you need set secret ")
14
+ #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
15
 
16
  model_id = "google/gemma-2-9b-it"
17
+
18
+ device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
19
  dtype = torch.bfloat16
20
 
21
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
22
 
23
+ print(model_id,device,dtype)
24
+ histories = []
25
+ #model = None
26
+
27
+
28
+ @spaces.GPU(duration=120)
29
+ def generate_text(messages):
30
  model = AutoModelForCausalLM.from_pretrained(
31
+ model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
32
  )
 
 
 
 
 
 
33
 
34
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
35
  result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
36
 
37
  generated_output = result[0]["generated_text"]
38
  if isinstance(generated_output, list):
39
  for message in reversed(generated_output):
40
  if message.get("role") == "assistant":
41
+ content= message.get("content", "No content found.")
42
+ return content
43
+
44
  return "No assistant response found."
45
  else:
46
  return "Unexpected output format."
47
 
48
+ def call_generate_text(prompt, system_message="You are a helpful assistant."):
49
+ if prompt =="":
50
+ print("empty prompt return")
51
+ return ""
52
+
53
+ global histories
54
+
55
+ messages = [
56
+ {"role": "system", "content": system_message},
57
+ ]
58
+
59
+ messages += histories
60
+
61
+ user_message = {"role": "user", "content": prompt}
62
+
63
+ messages += [user_message]
64
+
65
+ try:
66
+ text = generate_text(messages)
67
+ histories += [user_message,{"role": "assistant", "content": text}]
68
+
69
+ return text
70
+ except RuntimeError as e:
71
+ print(f"An unexpected error occurred: {e}")
72
+
73
+ return ""
74
 
75
  iface = gr.Interface(
76
+ fn=call_generate_text,
77
  inputs=[
78
  gr.Textbox(lines=3, label="Input Prompt"),
79
+ gr.Textbox(lines=2, label="System Message", value="あγͺたはθ¦ͺεˆ‡γͺγ‚’γ‚·γ‚Ήγ‚Ώγƒ³γƒˆγ§εΈΈγ«ζ—₯本θͺžγ§θΏ”答します。"),
80
  ],
81
  outputs=gr.Textbox(label="Generated Text"),
82
+ title=f"{model_id}",
83
+ description=f"{model_id}",
84
  )
85
+ print("Initialized")
86
+
87
 
88
  if __name__ == "__main__":
89
+ print("Main")
90
  iface.launch()