Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -34,7 +34,7 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
34 |
MODEL_ID_M,
|
35 |
trust_remote_code=True,
|
36 |
torch_dtype=torch.float16
|
37 |
-
).to(
|
38 |
|
39 |
# Load DocScope
|
40 |
MODEL_ID_X = "prithivMLmods/docscopeOCR-7B-050425-exp"
|
@@ -43,7 +43,16 @@ model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
43 |
MODEL_ID_X,
|
44 |
trust_remote_code=True,
|
45 |
torch_dtype=torch.float16
|
46 |
-
).to(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def downsample_video(video_path):
|
49 |
"""
|
@@ -82,6 +91,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
82 |
elif model_name == "docscopeOCR-7B-050425-exp":
|
83 |
processor = processor_x
|
84 |
model = model_x
|
|
|
|
|
|
|
85 |
else:
|
86 |
yield "Invalid model selected."
|
87 |
return
|
@@ -105,7 +117,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
105 |
padding=True,
|
106 |
truncation=False,
|
107 |
max_length=MAX_INPUT_TOKEN_LENGTH
|
108 |
-
).to(
|
109 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
110 |
generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
|
111 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
@@ -133,6 +145,9 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
133 |
elif model_name == "docscopeOCR-7B-050425-exp":
|
134 |
processor = processor_x
|
135 |
model = model_x
|
|
|
|
|
|
|
136 |
else:
|
137 |
yield "Invalid model selected."
|
138 |
return
|
@@ -158,7 +173,7 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
158 |
return_tensors="pt",
|
159 |
truncation=False,
|
160 |
max_length=MAX_INPUT_TOKEN_LENGTH
|
161 |
-
).to(
|
162 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
163 |
generation_kwargs = {
|
164 |
**inputs,
|
@@ -222,7 +237,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
222 |
examples=video_examples,
|
223 |
inputs=[video_query, video_upload]
|
224 |
)
|
225 |
-
|
226 |
with gr.Accordion("Advanced options", open=False):
|
227 |
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|
228 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
|
@@ -232,9 +246,10 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
232 |
with gr.Column():
|
233 |
output = gr.Textbox(label="Output", interactive=False)
|
234 |
model_choice = gr.Radio(
|
235 |
-
choices=["Cosmos-Reason1-7B", "docscopeOCR-7B-050425-exp"],
|
236 |
label="Select Model",
|
237 |
-
|
|
|
238 |
|
239 |
image_submit.click(
|
240 |
fn=generate_image,
|
|
|
34 |
MODEL_ID_M,
|
35 |
trust_remote_code=True,
|
36 |
torch_dtype=torch.float16
|
37 |
+
).to(device).eval()
|
38 |
|
39 |
# Load DocScope
|
40 |
MODEL_ID_X = "prithivMLmods/docscopeOCR-7B-050425-exp"
|
|
|
43 |
MODEL_ID_X,
|
44 |
trust_remote_code=True,
|
45 |
torch_dtype=torch.float16
|
46 |
+
).to(device).eval()
|
47 |
+
|
48 |
+
# Load InkScope Relaxed
|
49 |
+
MODEL_ID_Z = "prithivMLmods/Inkscope-Captions-2B-0526"
|
50 |
+
processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
|
51 |
+
model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
52 |
+
MODEL_ID_Z,
|
53 |
+
trust_remote_code=True,
|
54 |
+
torch_dtype=torch.float16
|
55 |
+
).to(device).eval()
|
56 |
|
57 |
def downsample_video(video_path):
|
58 |
"""
|
|
|
91 |
elif model_name == "docscopeOCR-7B-050425-exp":
|
92 |
processor = processor_x
|
93 |
model = model_x
|
94 |
+
elif model_name == "Captions-Mini":
|
95 |
+
processor = processor_z
|
96 |
+
model = model_z
|
97 |
else:
|
98 |
yield "Invalid model selected."
|
99 |
return
|
|
|
117 |
padding=True,
|
118 |
truncation=False,
|
119 |
max_length=MAX_INPUT_TOKEN_LENGTH
|
120 |
+
).to(device)
|
121 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
122 |
generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
|
123 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
|
|
145 |
elif model_name == "docscopeOCR-7B-050425-exp":
|
146 |
processor = processor_x
|
147 |
model = model_x
|
148 |
+
elif model_name == "Captions-Mini":
|
149 |
+
processor = processor_z
|
150 |
+
model = model_z
|
151 |
else:
|
152 |
yield "Invalid model selected."
|
153 |
return
|
|
|
173 |
return_tensors="pt",
|
174 |
truncation=False,
|
175 |
max_length=MAX_INPUT_TOKEN_LENGTH
|
176 |
+
).to(device)
|
177 |
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
|
178 |
generation_kwargs = {
|
179 |
**inputs,
|
|
|
237 |
examples=video_examples,
|
238 |
inputs=[video_query, video_upload]
|
239 |
)
|
|
|
240 |
with gr.Accordion("Advanced options", open=False):
|
241 |
max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS)
|
242 |
temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
|
|
|
246 |
with gr.Column():
|
247 |
output = gr.Textbox(label="Output", interactive=False)
|
248 |
model_choice = gr.Radio(
|
249 |
+
choices=["Cosmos-Reason1-7B", "docscopeOCR-7B-050425-exp", "Captions-Mini"],
|
250 |
label="Select Model",
|
251 |
+
value="Cosmos-Reason1-7B"
|
252 |
+
)
|
253 |
|
254 |
image_submit.click(
|
255 |
fn=generate_image,
|