Akjava commited on
Commit
dc37782
Β·
1 Parent(s): 940d9a9
Files changed (1) hide show
  1. app.py +22 -75
app.py CHANGED
@@ -6,104 +6,51 @@ import spaces
6
 
7
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
8
  if not huggingface_token:
9
- pass
10
- print("no HUGGINGFACE_TOKEN if you need set secret ")
11
- #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
12
 
 
13
  model_id = "microsoft/Phi-3-mini-128k-instruct"
14
-
15
- device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
  dtype = torch.bfloat16
17
 
18
- tokenizer = AutoTokenizer.from_pretrained(model_id)#, token=huggingface_token)
19
-
20
-
21
- import time
22
- time.sleep(10)
23
-
24
 
25
  print(model_id,device,dtype)
26
- histories = []
27
- contents = []
28
-
29
- def call_generate_text(prompt, system_message="You are a helpful assistant."):
30
-
31
- print(histories)
32
- print(contents)
33
-
34
- if prompt =="":
35
- print("empty prompt return")
36
- return ""
37
- global initialized
38
- if not initialized:
39
- initialized = True
40
- #return
41
- try:
42
- text = generate_text(prompt,system_message)
43
- contents.append(text)
44
- return text
45
- except RuntimeError as e:
46
- print(f"An unexpected error occurred: {e}")
47
-
48
- return ""
49
-
50
-
51
-
52
- initialized = False
53
-
54
- iface = gr.Interface(
55
- fn=call_generate_text,
56
- inputs=[
57
- gr.Textbox(lines=3, label="Input Prompt"),
58
- gr.Textbox(lines=2, label="System Message", value="You are a helpful assistant."),
59
- ],
60
- outputs=gr.Textbox(label="Generated Text"),
61
- title="Phi-3-mini-128k-instruct",
62
- description="Phi-3-mini-128k-instruct",
63
- )
64
- print("Initialized")
65
-
66
- # keeping model seems make crash
67
-
68
- @spaces.GPU(duration=100)
69
  def generate_text(prompt, system_message="You are a helpful assistant."):
70
- #print(prompt,system_message)
71
-
72
- global histories
73
-
74
  model = AutoModelForCausalLM.from_pretrained(
75
- model_id ,torch_dtype=dtype,device_map=device # token=huggingface_token
76
  )
77
- #print(system_message)
78
- text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
79
 
80
  messages = [
81
  {"role": "system", "content": system_message},
 
82
  ]
83
-
84
- messages += histories
85
-
86
- user_message = {"role": "user", "content": prompt}
87
 
88
- messages += [user_message]
89
-
90
- #print(messages)
91
-
92
  result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
93
 
94
  generated_output = result[0]["generated_text"]
95
  if isinstance(generated_output, list):
96
  for message in reversed(generated_output):
97
  if message.get("role") == "assistant":
98
- content= message.get("content", "No content found.")
99
- histories += [user_message,{"role": "assistant", "content": content}]
100
- print(f"history = {len(histories)}")
101
- return content
102
-
103
  return "No assistant response found."
104
  else:
105
  return "Unexpected output format."
106
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  if __name__ == "__main__":
108
- print("Main")
109
  iface.launch()
 
6
 
7
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
8
  if not huggingface_token:
9
+ raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
 
 
10
 
11
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
12
  model_id = "microsoft/Phi-3-mini-128k-instruct"
13
+ # device_map style value auto not cuda
14
+ device = "auto" #torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  dtype = torch.bfloat16
16
 
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
 
 
 
 
 
18
 
19
  print(model_id,device,dtype)
20
+ @spaces.GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def generate_text(prompt, system_message="You are a helpful assistant."):
 
 
 
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
+ model_id, torch_dtype=dtype,device_map=device, token=huggingface_token
24
  )
25
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=dtype, device_map=device)
 
26
 
27
  messages = [
28
  {"role": "system", "content": system_message},
29
+ {"role": "user", "content": prompt},
30
  ]
 
 
 
 
31
 
 
 
 
 
32
  result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
33
 
34
  generated_output = result[0]["generated_text"]
35
  if isinstance(generated_output, list):
36
  for message in reversed(generated_output):
37
  if message.get("role") == "assistant":
38
+ return message.get("content", "No content found.")
 
 
 
 
39
  return "No assistant response found."
40
  else:
41
  return "Unexpected output format."
42
 
43
+
44
+ iface = gr.Interface(
45
+ fn=generate_text,
46
+ inputs=[
47
+ gr.Textbox(lines=3, label="Input Prompt"),
48
+ gr.Textbox(lines=2, label="System Message", value="You are a helpful assistant."),
49
+ ],
50
+ outputs=gr.Textbox(label="Generated Text"),
51
+ title="Llama 3.1 8B Instruct Text Generation",
52
+ description="Enter a prompt and optional system message to generate text using the Llama 3.1 8B Instruct model.",
53
+ )
54
+
55
  if __name__ == "__main__":
 
56
  iface.launch()