Spaces:
Running
on
Zero
Running
on
Zero
zRzRzRzRzRzRzR
commited on
Commit
·
a001585
1
Parent(s):
4fa9584
app.py
CHANGED
@@ -15,20 +15,23 @@ import time
|
|
15 |
MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
|
16 |
stop_generation = False
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
class GLM4VModel:
|
20 |
def __init__(self):
|
21 |
-
|
22 |
-
self.model = None
|
23 |
-
|
24 |
-
def load(self):
|
25 |
-
self.processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
26 |
-
self.model = Glm4vForConditionalGeneration.from_pretrained(
|
27 |
-
MODEL_PATH,
|
28 |
-
torch_dtype=torch.bfloat16,
|
29 |
-
device_map="auto",
|
30 |
-
attn_implementation="sdpa",
|
31 |
-
)
|
32 |
|
33 |
def _strip_html(self, t):
|
34 |
return re.sub(r"<[^>]+>", "", t).strip()
|
@@ -125,19 +128,19 @@ class GLM4VModel:
|
|
125 |
|
126 |
@spaces.GPU(duration=240)
|
127 |
def stream_generate(self, raw_hist, sys_prompt):
|
128 |
-
global stop_generation
|
129 |
stop_generation = False
|
130 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
131 |
-
inputs =
|
132 |
msgs,
|
133 |
tokenize=True,
|
134 |
add_generation_prompt=True,
|
135 |
return_dict=True,
|
136 |
return_tensors="pt",
|
137 |
padding=True,
|
138 |
-
).to(
|
139 |
|
140 |
-
streamer = TextIteratorStreamer(
|
141 |
gen_args = dict(
|
142 |
inputs,
|
143 |
max_new_tokens=8192,
|
@@ -149,7 +152,7 @@ class GLM4VModel:
|
|
149 |
streamer=streamer,
|
150 |
)
|
151 |
|
152 |
-
generation_thread = threading.Thread(target=
|
153 |
generation_thread.start()
|
154 |
|
155 |
buf = ""
|
@@ -190,8 +193,9 @@ def create_display_history(raw_hist):
|
|
190 |
return display_hist
|
191 |
|
192 |
|
|
|
|
|
193 |
glm4v = GLM4VModel()
|
194 |
-
glm4v.load()
|
195 |
|
196 |
|
197 |
def check_files(files):
|
@@ -310,4 +314,4 @@ with demo:
|
|
310 |
clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
|
311 |
|
312 |
if __name__ == "__main__":
|
313 |
-
demo.launch()
|
|
|
15 |
MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
|
16 |
stop_generation = False
|
17 |
|
18 |
+
processor = None
|
19 |
+
model = None
|
20 |
+
|
21 |
+
def load_model():
|
22 |
+
"""加载模型和处理器"""
|
23 |
+
global processor, model
|
24 |
+
processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
25 |
+
model = Glm4vForConditionalGeneration.from_pretrained(
|
26 |
+
MODEL_PATH,
|
27 |
+
torch_dtype=torch.bfloat16,
|
28 |
+
device_map="auto",
|
29 |
+
attn_implementation="sdpa",
|
30 |
+
)
|
31 |
|
32 |
class GLM4VModel:
|
33 |
def __init__(self):
|
34 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def _strip_html(self, t):
|
37 |
return re.sub(r"<[^>]+>", "", t).strip()
|
|
|
128 |
|
129 |
@spaces.GPU(duration=240)
|
130 |
def stream_generate(self, raw_hist, sys_prompt):
|
131 |
+
global stop_generation, processor, model
|
132 |
stop_generation = False
|
133 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
134 |
+
inputs = processor.apply_chat_template(
|
135 |
msgs,
|
136 |
tokenize=True,
|
137 |
add_generation_prompt=True,
|
138 |
return_dict=True,
|
139 |
return_tensors="pt",
|
140 |
padding=True,
|
141 |
+
).to(model.device)
|
142 |
|
143 |
+
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=False)
|
144 |
gen_args = dict(
|
145 |
inputs,
|
146 |
max_new_tokens=8192,
|
|
|
152 |
streamer=streamer,
|
153 |
)
|
154 |
|
155 |
+
generation_thread = threading.Thread(target=model.generate, kwargs=gen_args)
|
156 |
generation_thread.start()
|
157 |
|
158 |
buf = ""
|
|
|
193 |
return display_hist
|
194 |
|
195 |
|
196 |
+
# 加载模型和处理器
|
197 |
+
load_model()
|
198 |
glm4v = GLM4VModel()
|
|
|
199 |
|
200 |
|
201 |
def check_files(files):
|
|
|
314 |
clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
|
315 |
|
316 |
if __name__ == "__main__":
|
317 |
+
demo.launch()
|