Dots-OCR

Runtime error

Chillarmo commited on 6 days ago

Commit

53c9557

verified ·

1 Parent(s): 5d256ae

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -321,9 +321,11 @@ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = '
     return "\n".join(markdown_lines)
 # Initialize model and processor at script level
-model_id = "rednote-hilab/dots.ocr"
 model_path = "./models/dots-ocr-local"
-snapshot_download(
     repo_id=model_id,
     local_dir=model_path,
     local_dir_use_symlinks=False, # Recommended to set to False to avoid symlink issues
@@ -401,7 +403,8 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
                 **inputs,
                 max_new_tokens=max_new_tokens,
                 do_sample=False,
-                temperature=0.1
             )
         # Decode output
@@ -645,7 +648,7 @@ def create_gradio_interface():
                 A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
             </p>
             <div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
-                <a href="https://huggingface.co/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
                     📚 Hugging Face Model
                 </a>
                 <a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">

     return "\n".join(markdown_lines)
 # Initialize model and processor at script level
+# model_id = "rednote-hilab/dots.ocr"
+model_id = "helizac/dots.ocr-4bit"
 model_path = "./models/dots-ocr-local"
+model_path = snapshot_download(
     repo_id=model_id,
     local_dir=model_path,
     local_dir_use_symlinks=False, # Recommended to set to False to avoid symlink issues
                 **inputs,
                 max_new_tokens=max_new_tokens,
                 do_sample=False,
+                # temperature=0.1
+                temperature=0.6, top_p=0.9, repetition_penalty=1.15
             )
         # Decode output
                 A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
             </p>
             <div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
+                <a href="https://huggingface.co/helizac/dots.ocr-4bit" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
                     📚 Hugging Face Model
                 </a>
                 <a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">