mrdbourke commited on
Commit
d41d663
·
verified ·
1 Parent(s): 8539abf

Add Qwen-2.5-VL-3B

Browse files
Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -20,6 +20,16 @@ from transformers import (
20
  # Local imports
21
  from qwen_vl_utils import process_vision_info
22
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def array_to_image_path(image_array):
25
  if image_array is None:
@@ -41,13 +51,21 @@ def array_to_image_path(image_array):
41
 
42
  models = {
43
  "Qwen/Qwen2.5-VL-7B-Instruct": Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct",
 
 
 
 
 
44
  trust_remote_code=True,
45
  torch_dtype="auto",
46
  device_map="auto").eval()
 
47
  }
48
 
49
  processors = {
50
- "Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True)
 
 
51
  }
52
 
53
  DESCRIPTION = "[Qwen2.5-VL Demo](https://huggingface.co/collections/Qwen/qwen25-vl-6795ffac22b334a837c0f9a5)"
@@ -94,7 +112,7 @@ def run_example(image, text_input=None, model_id=None):
94
  padding=True,
95
  return_tensors="pt",
96
  )
97
- inputs = inputs.to("cuda")
98
 
99
  # Inference: Generation of the output
100
  generated_ids = model.generate(**inputs, max_new_tokens=1024)
@@ -127,7 +145,7 @@ with gr.Blocks(css=css) as demo:
127
  model_selector = gr.Dropdown(choices=list(models.keys()),
128
  label="Model",
129
  value="Qwen/Qwen2.5-VL-7B-Instruct")
130
- text_input = gr.Textbox(label="Question")
131
  submit_btn = gr.Button(value="Submit")
132
  with gr.Column():
133
  output_text = gr.Textbox(label="Output Text")
 
20
  # Local imports
21
  from qwen_vl_utils import process_vision_info
22
 
23
+ # Set device agnostic code
24
+ if torch.cuda.is_available():
25
+ device = "cuda"
26
+ elif (torch.backends.mps.is_available()) and (torch.backends.mps.is_built()):
27
+ device = "mps"
28
+ else:
29
+ device = "cpu"
30
+
31
+ print(f"[INFO] Using device: {device}")
32
+
33
 
34
  def array_to_image_path(image_array):
35
  if image_array is None:
 
51
 
52
  models = {
53
  "Qwen/Qwen2.5-VL-7B-Instruct": Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct",
54
+ trust_remote_code=True,
55
+ torch_dtype="auto",
56
+ device_map="auto").eval(),
57
+
58
+ "Qwen/Qwen2.5-VL-3B-Instruct": Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct",
59
  trust_remote_code=True,
60
  torch_dtype="auto",
61
  device_map="auto").eval()
62
+
63
  }
64
 
65
  processors = {
66
+ "Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True),
67
+ "Qwen/Qwen2.5-VL-3B-Instruct": AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True)
68
+
69
  }
70
 
71
  DESCRIPTION = "[Qwen2.5-VL Demo](https://huggingface.co/collections/Qwen/qwen25-vl-6795ffac22b334a837c0f9a5)"
 
112
  padding=True,
113
  return_tensors="pt",
114
  )
115
+ inputs = inputs.to(device)
116
 
117
  # Inference: Generation of the output
118
  generated_ids = model.generate(**inputs, max_new_tokens=1024)
 
145
  model_selector = gr.Dropdown(choices=list(models.keys()),
146
  label="Model",
147
  value="Qwen/Qwen2.5-VL-7B-Instruct")
148
+ text_input = gr.Textbox(label="Text Prompt")
149
  submit_btn = gr.Button(value="Submit")
150
  with gr.Column():
151
  output_text = gr.Textbox(label="Output Text")