Akjava commited on
Commit
376d532
Β·
1 Parent(s): 44599cb
Files changed (2) hide show
  1. app.py +109 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ import gradio as gr
5
+ import spaces
6
+
7
+ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
8
+ if not huggingface_token:
9
+ pass
10
+ print("no HUGGINGFACE_TOKEN if you need set secret ")
11
+ #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
12
+
13
+ model_id = "microsoft/Phi-3-mini-128k-instruct"
14
+
15
+ device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ dtype = torch.bfloat16
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
19
+
20
+
21
+ import time
22
+ time.sleep(10)
23
+
24
+
25
+ print(model_id,device,dtype)
26
+ histories = []
27
+ contents = []
28
+
29
+ def call_generate_text(prompt, system_message="You are a helpful assistant."):
30
+
31
+ print(histories)
32
+ print(contents)
33
+
34
+ if prompt =="":
35
+ print("empty prompt return")
36
+ return ""
37
+ global initialized
38
+ if not initialized:
39
+ initialized = True
40
+ #return
41
+ try:
42
+ text = generate_text(prompt,system_message)
43
+ contents.append(text)
44
+ return text
45
+ except RuntimeError as e:
46
+ print(f"An unexpected error occurred: {e}")
47
+
48
+ return ""
49
+
50
+
51
+
52
+ initialized = False
53
+
54
+ iface = gr.Interface(
55
+ fn=call_generate_text,
56
+ inputs=[
57
+ gr.Textbox(lines=3, label="Input Prompt"),
58
+ gr.Textbox(lines=2, label="System Message", value="あγͺたはθ¦ͺεˆ‡γͺγ‚’γ‚·γ‚Ήγ‚Ώγƒ³γƒˆγ§εΈΈγ«ζ—₯本θͺžγ§θΏ”答します。"),
59
+ ],
60
+ outputs=gr.Textbox(label="Generated Text"),
61
+ title="Phi-3-mini-128k-instruct",
62
+ description="Phi-3-mini-128k-instruct",
63
+ )
64
+ print("Initialized")
65
+
66
+ # keeping model seems make crash
67
+
68
+ @spaces.GPU(duration=100)
69
+ def generate_text(prompt, system_message="You are a helpful assistant."):
70
+ #print(prompt,system_message)
71
+
72
+ global histories
73
+
74
+ model = AutoModelForCausalLM.from_pretrained(
75
+ model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
76
+ )
77
+ #print(system_message)
78
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
79
+
80
+ messages = [
81
+ {"role": "system", "content": system_message},
82
+ ]
83
+
84
+ messages += histories
85
+
86
+ user_message = {"role": "user", "content": prompt}
87
+
88
+ messages += [user_message]
89
+
90
+ #print(messages)
91
+
92
+ result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
93
+
94
+ generated_output = result[0]["generated_text"]
95
+ if isinstance(generated_output, list):
96
+ for message in reversed(generated_output):
97
+ if message.get("role") == "assistant":
98
+ content= message.get("content", "No content found.")
99
+ histories += [user_message,{"role": "assistant", "content": content}]
100
+ print(f"history = {len(histories)}")
101
+ return content
102
+
103
+ return "No assistant response found."
104
+ else:
105
+ return "Unexpected output format."
106
+
107
+ if __name__ == "__main__":
108
+ print("Main")
109
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ spaces
4
+ accelerate
5
+ bitsandbytes
6
+ transformers