yejunliang23 commited on
Commit
c4e7abe
·
unverified ·
1 Parent(s): 0166d6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -24
app.py CHANGED
@@ -36,6 +36,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
36
  )
37
  processor = AutoProcessor.from_pretrained(MODEL_DIR)
38
 
 
39
  def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
40
  # —— 原有多模态输入构造 —— #
41
  messages = [
@@ -49,46 +50,30 @@ def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_
49
  text = processor.apply_chat_template(
50
  messages, tokenize=False, add_generation_prompt=True
51
  )
 
52
  image_inputs, video_inputs = process_vision_info(messages)
53
  inputs = processor(
54
  text=[text],
55
  images=image_inputs,
56
  videos=video_inputs,
57
- padding=True,
58
  return_tensors="pt"
59
  ).to(model.device)
60
 
61
- # —— 流式生成部分 —— #
62
- # 1. 构造 streamer,用 processor.tokenizer(AutoProcessor 内部自带 tokenizer)
63
- streamer = TextIteratorStreamer(
64
- processor.tokenizer,
65
- timeout=100.0,
66
- skip_prompt=True,
67
- skip_special_tokens=True
68
- )
69
-
70
  # 2. 把 streamer 和生成参数一起传给 model.generate
71
  gen_kwargs = dict(
72
  **inputs, # 包含 input_ids, pixel_values, attention_mask 等
73
- streamer=streamer, # 关键:挂载 streamer
74
  top_k=1024,
75
  max_new_tokens=max_new_tokens,
76
  temperature=temperature,
77
  top_p=0.1
78
  )
79
- # 如果需要零温度贪心,则关闭采样
80
- if gen_kwargs["temperature"] == 0:
81
- gen_kwargs["do_sample"] = False
82
-
83
- # 3. 在后台线程中启动生成
84
- Thread(target=model.generate, kwargs=gen_kwargs).start()
85
-
86
- # 4. 在主线程中实时读取并 yield
87
- buffer = []
88
- for chunk in streamer:
89
- buffer.append(chunk)
90
- # 每次拿到新片段就拼接并输出
91
- yield "".join(buffer)
92
 
93
 
94
  # --------- 3D Mesh Coloring Function ---------
 
36
  )
37
  processor = AutoProcessor.from_pretrained(MODEL_DIR)
38
 
39
+
40
  def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
41
  # —— 原有多模态输入构造 —— #
42
  messages = [
 
50
  text = processor.apply_chat_template(
51
  messages, tokenize=False, add_generation_prompt=True
52
  )
53
+ print(text)
54
  image_inputs, video_inputs = process_vision_info(messages)
55
  inputs = processor(
56
  text=[text],
57
  images=image_inputs,
58
  videos=video_inputs,
59
+ padding=False,
60
  return_tensors="pt"
61
  ).to(model.device)
62
 
 
 
 
 
 
 
 
 
 
63
  # 2. 把 streamer 和生成参数一起传给 model.generate
64
  gen_kwargs = dict(
65
  **inputs, # 包含 input_ids, pixel_values, attention_mask 等
 
66
  top_k=1024,
67
  max_new_tokens=max_new_tokens,
68
  temperature=temperature,
69
  top_p=0.1
70
  )
71
+ generated_ids = model.generate(**gen_kwargs)
72
+ generated_ids_trimmed = [
73
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
74
+ output_text = processor.batch_decode(
75
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
76
+ yield output_text
 
 
 
 
 
 
 
77
 
78
 
79
  # --------- 3D Mesh Coloring Function ---------