Update app.py
Browse files
app.py
CHANGED
@@ -321,9 +321,11 @@ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = '
|
|
321 |
return "\n".join(markdown_lines)
|
322 |
|
323 |
# Initialize model and processor at script level
|
324 |
-
model_id = "rednote-hilab/dots.ocr"
|
|
|
|
|
325 |
model_path = "./models/dots-ocr-local"
|
326 |
-
snapshot_download(
|
327 |
repo_id=model_id,
|
328 |
local_dir=model_path,
|
329 |
local_dir_use_symlinks=False, # Recommended to set to False to avoid symlink issues
|
@@ -401,7 +403,8 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
|
|
401 |
**inputs,
|
402 |
max_new_tokens=max_new_tokens,
|
403 |
do_sample=False,
|
404 |
-
temperature=0.1
|
|
|
405 |
)
|
406 |
|
407 |
# Decode output
|
@@ -645,7 +648,7 @@ def create_gradio_interface():
|
|
645 |
A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
|
646 |
</p>
|
647 |
<div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
|
648 |
-
<a href="https://huggingface.co/
|
649 |
π Hugging Face Model
|
650 |
</a>
|
651 |
<a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
|
|
321 |
return "\n".join(markdown_lines)
|
322 |
|
323 |
# Initialize model and processor at script level
|
324 |
+
# model_id = "rednote-hilab/dots.ocr"
|
325 |
+
model_id = "helizac/dots.ocr-4bit"
|
326 |
+
|
327 |
model_path = "./models/dots-ocr-local"
|
328 |
+
model_path = snapshot_download(
|
329 |
repo_id=model_id,
|
330 |
local_dir=model_path,
|
331 |
local_dir_use_symlinks=False, # Recommended to set to False to avoid symlink issues
|
|
|
403 |
**inputs,
|
404 |
max_new_tokens=max_new_tokens,
|
405 |
do_sample=False,
|
406 |
+
# temperature=0.1
|
407 |
+
temperature=0.6, top_p=0.9, repetition_penalty=1.15
|
408 |
)
|
409 |
|
410 |
# Decode output
|
|
|
648 |
A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
|
649 |
</p>
|
650 |
<div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
|
651 |
+
<a href="https://huggingface.co/helizac/dots.ocr-4bit" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
652 |
π Hugging Face Model
|
653 |
</a>
|
654 |
<a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|