Chillarmo commited on
Commit
53c9557
Β·
verified Β·
1 Parent(s): 5d256ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -321,9 +321,11 @@ def layoutjson2md(image: Image.Image, layout_data: List[Dict], text_key: str = '
321
  return "\n".join(markdown_lines)
322
 
323
  # Initialize model and processor at script level
324
- model_id = "rednote-hilab/dots.ocr"
 
 
325
  model_path = "./models/dots-ocr-local"
326
- snapshot_download(
327
  repo_id=model_id,
328
  local_dir=model_path,
329
  local_dir_use_symlinks=False, # Recommended to set to False to avoid symlink issues
@@ -401,7 +403,8 @@ def inference(image: Image.Image, prompt: str, max_new_tokens: int = 24000) -> s
401
  **inputs,
402
  max_new_tokens=max_new_tokens,
403
  do_sample=False,
404
- temperature=0.1
 
405
  )
406
 
407
  # Decode output
@@ -645,7 +648,7 @@ def create_gradio_interface():
645
  A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
646
  </p>
647
  <div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
648
- <a href="https://huggingface.co/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
649
  πŸ“š Hugging Face Model
650
  </a>
651
  <a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
 
321
  return "\n".join(markdown_lines)
322
 
323
  # Initialize model and processor at script level
324
+ # model_id = "rednote-hilab/dots.ocr"
325
+ model_id = "helizac/dots.ocr-4bit"
326
+
327
  model_path = "./models/dots-ocr-local"
328
+ model_path = snapshot_download(
329
  repo_id=model_id,
330
  local_dir=model_path,
331
  local_dir_use_symlinks=False, # Recommended to set to False to avoid symlink issues
 
403
  **inputs,
404
  max_new_tokens=max_new_tokens,
405
  do_sample=False,
406
+ # temperature=0.1
407
+ temperature=0.6, top_p=0.9, repetition_penalty=1.15
408
  )
409
 
410
  # Decode output
 
648
  A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
649
  </p>
650
  <div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
651
+ <a href="https://huggingface.co/helizac/dots.ocr-4bit" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
652
  πŸ“š Hugging Face Model
653
  </a>
654
  <a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">