Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,8 @@ from transformers import (
|
|
17 |
from qwen_vl_utils import process_vision_info
|
18 |
|
19 |
# Constants for text generation
|
20 |
-
MAX_MAX_NEW_TOKENS =
|
21 |
-
DEFAULT_MAX_NEW_TOKENS =
|
22 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
23 |
|
24 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
@@ -61,7 +61,6 @@ model_s = Glm4vForConditionalGeneration.from_pretrained(
|
|
61 |
torch_dtype=torch.float16
|
62 |
).to(device).eval()
|
63 |
|
64 |
-
|
65 |
def downsample_video(video_path):
|
66 |
"""
|
67 |
Downsample a video to evenly spaced frames, returning each as a PIL image with its timestamp.
|
@@ -219,7 +218,7 @@ video_examples = [
|
|
219 |
["explain the video in detail.", "videos/2.mp4"]
|
220 |
]
|
221 |
|
222 |
-
#
|
223 |
css = """
|
224 |
.submit-btn {
|
225 |
background-color: #2980b9 !important;
|
@@ -233,11 +232,17 @@ css = """
|
|
233 |
border-radius: 10px;
|
234 |
padding: 20px;
|
235 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
"""
|
237 |
|
238 |
# Create the Gradio Interface
|
239 |
with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
240 |
-
gr.Markdown("# **[Multimodal
|
241 |
with gr.Row():
|
242 |
with gr.Column():
|
243 |
with gr.Tabs():
|
@@ -276,7 +281,8 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
276 |
model_choice = gr.Radio(
|
277 |
choices=["Camel-Doc-OCR-062825", "GLM-4.1V-9B-Thinking", "Megalodon-OCR-Sync-0713", "MonkeyOCR-pro-1.2B"],
|
278 |
label="Select Model",
|
279 |
-
value="Camel-Doc-OCR-062825"
|
|
|
280 |
)
|
281 |
|
282 |
gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
|
|
|
17 |
from qwen_vl_utils import process_vision_info
|
18 |
|
19 |
# Constants for text generation
|
20 |
+
MAX_MAX_NEW_TOKENS = 2048
|
21 |
+
DEFAULT_MAX_NEW_TOKENS = 1024
|
22 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
23 |
|
24 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
|
61 |
torch_dtype=torch.float16
|
62 |
).to(device).eval()
|
63 |
|
|
|
64 |
def downsample_video(video_path):
|
65 |
"""
|
66 |
Downsample a video to evenly spaced frames, returning each as a PIL image with its timestamp.
|
|
|
218 |
["explain the video in detail.", "videos/2.mp4"]
|
219 |
]
|
220 |
|
221 |
+
# Updated CSS with model choice highlighting
|
222 |
css = """
|
223 |
.submit-btn {
|
224 |
background-color: #2980b9 !important;
|
|
|
232 |
border-radius: 10px;
|
233 |
padding: 20px;
|
234 |
}
|
235 |
+
.model-choice label {
|
236 |
+
color: red;
|
237 |
+
}
|
238 |
+
.model-choice input[value="Camel-Doc-OCR-062825"] + label {
|
239 |
+
color: blue;
|
240 |
+
}
|
241 |
"""
|
242 |
|
243 |
# Create the Gradio Interface
|
244 |
with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
245 |
+
gr.Markdown("# **[Multimodal VLMOCR](https://huggingface.co/collections/prithivMLmods/multimodal-implementations-67c9982ea04b39f0608badb0)**")
|
246 |
with gr.Row():
|
247 |
with gr.Column():
|
248 |
with gr.Tabs():
|
|
|
281 |
model_choice = gr.Radio(
|
282 |
choices=["Camel-Doc-OCR-062825", "GLM-4.1V-9B-Thinking", "Megalodon-OCR-Sync-0713", "MonkeyOCR-pro-1.2B"],
|
283 |
label="Select Model",
|
284 |
+
value="Camel-Doc-OCR-062825",
|
285 |
+
elem_classes=["model-choice"]
|
286 |
)
|
287 |
|
288 |
gr.Markdown("**Model Info 💻** | [Report Bug](https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR-Comparator/discussions)")
|