Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,519 Bytes
a8b0636 80b7578 ebd9056 80b7578 eaa703f ebd9056 80b7578 0d09a3a eaa703f e19b349 80b7578 e19b349 80b7578 e19b349 80b7578 583ea10 80b7578 e19b349 80b7578 e19b349 80b7578 e19b349 0d09a3a e19b349 0d09a3a 80b7578 e19b349 80b7578 e19b349 707a904 e19b349 80b7578 707a904 80b7578 ebd9056 e19b349 80b7578 e19b349 80b7578 e19b349 80b7578 ebd9056 e19b349 80b7578 3e7a2b7 e19b349 3e7a2b7 22fc8c6 bbbd1e2 e19b349 80b7578 707a904 0d09a3a e19b349 80b7578 e19b349 0d09a3a 22fc8c6 e19b349 80b7578 e19b349 3e7a2b7 ebd9056 e19b349 0d09a3a 3e7a2b7 0d09a3a 80b7578 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import spaces
import torch
import gradio as gr
from PIL import Image
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from functools import lru_cache
MODEL_ID = "remyxai/SpaceThinker-Qwen2.5VL-3B"
@lru_cache(maxsize=1)
def _load_model():
"""Load and cache the model and processor inside GPU worker."""
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16
).to("cuda")
processor = AutoProcessor.from_pretrained(MODEL_ID)
return model, processor
@spaces.GPU
def gpu_inference(image_path: str, prompt: str) -> str:
"""Perform inference entirely in GPU subprocess."""
model, processor = _load_model()
# Load and preprocess image
image = Image.open(image_path).convert("RGB")
if image.width > 512:
ratio = image.height / image.width
image = image.resize((512, int(512 * ratio)), Image.Resampling.LANCZOS)
# Build conversation
system_msg = (
"You are VL-Thinking U+1F914, a helpful assistant with excellent reasoning ability.\n"
"A user asks you a question, and you should try to solve it."
"You should first think about the reasoning process in the mind and then provides the user with the answer.\n"
"The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think> <answer> answer here </answer>."
)
conversation = [
{"role": "system", "content": [{"type": "text", "text": system_msg}]},
{"role": "user", "content": [
{"type": "image", "image": image},
{"type": "text", "text": prompt}
]}
]
# Tokenize, generate, decode
chat_input = processor.apply_chat_template(
conversation, tokenize=False, add_generation_prompt=True
)
inputs = processor(text=[chat_input], images=[image], return_tensors="pt").to("cuda")
output_ids = model.generate(**inputs, max_new_tokens=1024)
decoded = processor.batch_decode(
output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)[0]
# Extract assistant portion
return decoded.split("assistant", 1)[-1].strip().lstrip(":").strip()
# Message handling
def add_message(history, user_input):
if history is None:
history = []
for f in user_input.get("files", []):
history.append({"role": "user", "content": (f,)})
text = user_input.get("text", "")
if text:
history.append({"role": "user", "content": text})
return history, gr.MultimodalTextbox(value=None)
def inference_interface(history):
if not history:
return history, gr.MultimodalTextbox(value=None)
# Last user text
user_text = next(
(m["content"] for m in reversed(history)
if m["role"] == "user" and isinstance(m["content"], str)),
None
)
if user_text is None:
return history, gr.MultimodalTextbox(value=None)
# Last user image
image_path = next(
(m["content"][0] for m in reversed(history)
if m["role"] == "user" and isinstance(m["content"], tuple)),
None
)
if image_path is None:
return history, gr.MultimodalTextbox(value=None)
# GPU inference
reply = gpu_inference(image_path, user_text)
history.append({"role": "assistant", "content": reply})
return history, gr.MultimodalTextbox(value=None)
def build_demo():
with gr.Blocks() as demo:
gr.Markdown("# SpaceThinker-Qwen2.5VL-3B")
chatbot = gr.Chatbot([], type="messages", label="Conversation")
chat_input = gr.MultimodalTextbox(
interactive=True,
file_types=["image"],
placeholder="Enter text and upload an image.",
show_label=True
)
submit_evt = chat_input.submit(
add_message, [chatbot, chat_input], [chatbot, chat_input]
)
submit_evt.then(
inference_interface, [chatbot], [chatbot, chat_input]
)
with gr.Row():
send_btn = gr.Button("Send")
clear_btn = gr.ClearButton([chatbot, chat_input])
send_click = send_btn.click(
add_message, [chatbot, chat_input], [chatbot, chat_input]
)
send_click.then(
inference_interface, [chatbot], [chatbot, chat_input]
)
return demo
if __name__ == "__main__":
demo = build_demo()
demo.launch(share=True)
|