Spaces:

yejunliang23
/

ShapLLM-Omni

Runtime error

App Files Files Community

yejunliang23 commited on May 26

Commit

c4e7abe

unverified ·

1 Parent(s): 0166d6b

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -24

app.py CHANGED Viewed

@@ -36,6 +36,7 @@ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 )
 processor = AutoProcessor.from_pretrained(MODEL_DIR)
 def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
     # —— 原有多模态输入构造 —— #
     messages = [
@@ -49,46 +50,30 @@ def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text],
         images=image_inputs,
         videos=video_inputs,
-        padding=True,
         return_tensors="pt"
     ).to(model.device)
-    # —— 流式生成部分 —— #
-    # 1. 构造 streamer，用 processor.tokenizer（AutoProcessor 内部自带 tokenizer）
-    streamer = TextIteratorStreamer(
-        processor.tokenizer,
-        timeout=100.0,
-        skip_prompt=True,
-        skip_special_tokens=True
-    )
     # 2. 把 streamer 和生成参数一起传给 model.generate
     gen_kwargs = dict(
         **inputs,           # 包含 input_ids, pixel_values, attention_mask 等
-        streamer=streamer,  # 关键：挂载 streamer
         top_k=1024,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
         top_p=0.1
         )
-    # 如果需要零温度贪心，则关闭采样
-    if gen_kwargs["temperature"] == 0:
-        gen_kwargs["do_sample"] = False
-    # 3. 在后台线程中启动生成
-    Thread(target=model.generate, kwargs=gen_kwargs).start()
-    # 4. 在主线程中实时读取并 yield
-    buffer = []
-    for chunk in streamer:
-        buffer.append(chunk)
-        # 每次拿到新片段就拼接并输出
-        yield "".join(buffer)
 # --------- 3D Mesh Coloring Function ---------

 )
 processor = AutoProcessor.from_pretrained(MODEL_DIR)
 def chat_qwen_vl(message: str, history: list, temperature: float = 0.1, max_new_tokens: int = 1024):
     # —— 原有多模态输入构造 —— #
     messages = [
     text = processor.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
+    print(text)
     image_inputs, video_inputs = process_vision_info(messages)
     inputs = processor(
         text=[text],
         images=image_inputs,
         videos=video_inputs,
+        padding=False,
         return_tensors="pt"
     ).to(model.device)
     # 2. 把 streamer 和生成参数一起传给 model.generate
     gen_kwargs = dict(
         **inputs,           # 包含 input_ids, pixel_values, attention_mask 等
         top_k=1024,
         max_new_tokens=max_new_tokens,
         temperature=temperature,
         top_p=0.1
         )
+    generated_ids = model.generate(**gen_kwargs)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+    yield output_text
 # --------- 3D Mesh Coloring Function ---------