Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ from PIL import Image
|
|
19 |
import fitz
|
20 |
|
21 |
from transformers import (
|
22 |
-
|
23 |
AutoModelForVision2Seq,
|
24 |
AutoModelForImageTextToText,
|
25 |
AutoModel,
|
@@ -73,14 +73,14 @@ SUBFOLDER = "Recognition"
|
|
73 |
processor_g = AutoProcessor.from_pretrained(
|
74 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER
|
75 |
)
|
76 |
-
model_g =
|
77 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
78 |
).to(device).eval()
|
79 |
|
80 |
MODEL_ID_I = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
|
81 |
processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
|
82 |
model_i = AutoModelForImageTextToText.from_pretrained(
|
83 |
-
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.
|
84 |
).to(device).eval()
|
85 |
|
86 |
|
@@ -186,6 +186,8 @@ def process_document_stream(
|
|
186 |
messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt_input}]}]
|
187 |
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
188 |
inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True, truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH).to(device)
|
|
|
|
|
189 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
190 |
|
191 |
generation_kwargs = {
|
|
|
19 |
import fitz
|
20 |
|
21 |
from transformers import (
|
22 |
+
Qwen2VLForConditionalGeneration,
|
23 |
AutoModelForVision2Seq,
|
24 |
AutoModelForImageTextToText,
|
25 |
AutoModel,
|
|
|
73 |
processor_g = AutoProcessor.from_pretrained(
|
74 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER
|
75 |
)
|
76 |
+
model_g = Qwen2VLForConditionalGeneration.from_pretrained(
|
77 |
MODEL_ID_G, trust_remote_code=True, subfolder=SUBFOLDER, torch_dtype=torch.float16
|
78 |
).to(device).eval()
|
79 |
|
80 |
MODEL_ID_I = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
|
81 |
processor_i = AutoProcessor.from_pretrained(MODEL_ID_I, trust_remote_code=True)
|
82 |
model_i = AutoModelForImageTextToText.from_pretrained(
|
83 |
+
MODEL_ID_I, trust_remote_code=True, torch_dtype=torch.bfloat16, _attn_implementation="flash_attention_2"
|
84 |
).to(device).eval()
|
85 |
|
86 |
|
|
|
186 |
messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": prompt_input}]}]
|
187 |
prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
188 |
inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True, truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH).to(device)
|
189 |
+
# Convert floating point tensors to the model's dtype
|
190 |
+
inputs = {k: v.to(dtype=model.dtype) if isinstance(v, torch.Tensor) and v.dtype.is_floating_point else v for k, v in inputs.items()}
|
191 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
192 |
|
193 |
generation_kwargs = {
|