zRzRzRzRzRzRzR commited on
Commit
a001585
·
1 Parent(s): 4fa9584
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -15,20 +15,23 @@ import time
15
  MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
16
  stop_generation = False
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  class GLM4VModel:
20
  def __init__(self):
21
- self.processor = None
22
- self.model = None
23
-
24
- def load(self):
25
- self.processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
26
- self.model = Glm4vForConditionalGeneration.from_pretrained(
27
- MODEL_PATH,
28
- torch_dtype=torch.bfloat16,
29
- device_map="auto",
30
- attn_implementation="sdpa",
31
- )
32
 
33
  def _strip_html(self, t):
34
  return re.sub(r"<[^>]+>", "", t).strip()
@@ -125,19 +128,19 @@ class GLM4VModel:
125
 
126
  @spaces.GPU(duration=240)
127
  def stream_generate(self, raw_hist, sys_prompt):
128
- global stop_generation
129
  stop_generation = False
130
  msgs = self._build_messages(raw_hist, sys_prompt)
131
- inputs = self.processor.apply_chat_template(
132
  msgs,
133
  tokenize=True,
134
  add_generation_prompt=True,
135
  return_dict=True,
136
  return_tensors="pt",
137
  padding=True,
138
- ).to(self.model.device)
139
 
140
- streamer = TextIteratorStreamer(self.processor.tokenizer, skip_prompt=True, skip_special_tokens=False)
141
  gen_args = dict(
142
  inputs,
143
  max_new_tokens=8192,
@@ -149,7 +152,7 @@ class GLM4VModel:
149
  streamer=streamer,
150
  )
151
 
152
- generation_thread = threading.Thread(target=self.model.generate, kwargs=gen_args)
153
  generation_thread.start()
154
 
155
  buf = ""
@@ -190,8 +193,9 @@ def create_display_history(raw_hist):
190
  return display_hist
191
 
192
 
 
 
193
  glm4v = GLM4VModel()
194
- glm4v.load()
195
 
196
 
197
  def check_files(files):
@@ -310,4 +314,4 @@ with demo:
310
  clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
311
 
312
  if __name__ == "__main__":
313
- demo.launch()
 
15
  MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
16
  stop_generation = False
17
 
18
+ processor = None
19
+ model = None
20
+
21
+ def load_model():
22
+ """加载模型和处理器"""
23
+ global processor, model
24
+ processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
25
+ model = Glm4vForConditionalGeneration.from_pretrained(
26
+ MODEL_PATH,
27
+ torch_dtype=torch.bfloat16,
28
+ device_map="auto",
29
+ attn_implementation="sdpa",
30
+ )
31
 
32
  class GLM4VModel:
33
  def __init__(self):
34
+ pass
 
 
 
 
 
 
 
 
 
 
35
 
36
  def _strip_html(self, t):
37
  return re.sub(r"<[^>]+>", "", t).strip()
 
128
 
129
  @spaces.GPU(duration=240)
130
  def stream_generate(self, raw_hist, sys_prompt):
131
+ global stop_generation, processor, model
132
  stop_generation = False
133
  msgs = self._build_messages(raw_hist, sys_prompt)
134
+ inputs = processor.apply_chat_template(
135
  msgs,
136
  tokenize=True,
137
  add_generation_prompt=True,
138
  return_dict=True,
139
  return_tensors="pt",
140
  padding=True,
141
+ ).to(model.device)
142
 
143
+ streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=False)
144
  gen_args = dict(
145
  inputs,
146
  max_new_tokens=8192,
 
152
  streamer=streamer,
153
  )
154
 
155
+ generation_thread = threading.Thread(target=model.generate, kwargs=gen_args)
156
  generation_thread.start()
157
 
158
  buf = ""
 
193
  return display_hist
194
 
195
 
196
+ # 加载模型和处理器
197
+ load_model()
198
  glm4v = GLM4VModel()
 
199
 
200
 
201
  def check_files(files):
 
314
  clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
315
 
316
  if __name__ == "__main__":
317
+ demo.launch()