taprosoft commited on
Commit
007293f
·
1 Parent(s): 1ae7633

fix: use ZERO

Browse files
Files changed (5) hide show
  1. Dockerfile +0 -42
  2. README.md +4 -1
  3. app.py +7 -0
  4. backends/smoldocling.py +2 -0
  5. requirements.txt +2 -1
Dockerfile DELETED
@@ -1,42 +0,0 @@
1
- FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
2
-
3
- ARG DEBIAN_FRONTEND=noninteractive
4
-
5
- ENV PYTHONUNBUFFERED=1
6
-
7
- RUN apt-get update && apt-get install --no-install-recommends -y \
8
- build-essential \
9
- python3.10-dev \
10
- python3-pip \
11
- wget \
12
- git \
13
- ffmpeg \
14
- poppler-utils \
15
- libpoppler-dev \
16
- tesseract-ocr \
17
- && apt-get clean && rm -rf /var/lib/apt/lists/*
18
-
19
- WORKDIR /code
20
-
21
- COPY ./requirements.txt /code/requirements.txt
22
-
23
- # Set up a new user named "user" with user ID 1000
24
- RUN useradd -m -u 1000 user
25
- # Switch to the "user" user
26
- USER user
27
- # Set home to the user's home directory
28
- ENV HOME=/home/user \
29
- PATH=/home/user/.local/bin:$PATH \
30
- PYTHONPATH=$HOME/app \
31
- PYTHONUNBUFFERED=1 \
32
- GRADIO_SERVER_NAME=0.0.0.0
33
-
34
- RUN pip3 install --no-cache-dir --upgrade -r /code/requirements.txt
35
-
36
- # Set the working directory to the user's home directory
37
- WORKDIR $HOME/app
38
-
39
- # Copy the current directory contents into the container at $HOME/app setting the owner to the user
40
- COPY --chown=user . $HOME/app
41
-
42
- CMD ["python3", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -3,7 +3,10 @@ title: SmolDoclingPreview
3
  emoji: 🐢
4
  colorFrom: blue
5
  colorTo: green
6
- sdk: docker
 
 
 
7
  pinned: false
8
  header: mini
9
  short_description: Convert PDFs to Markdown with SmolDoclingPreview
 
3
  emoji: 🐢
4
  colorFrom: blue
5
  colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.12.0
8
+ app_file: app.py
9
+ license: apache-2.0
10
  pinned: false
11
  header: mini
12
  short_description: Convert PDFs to Markdown with SmolDoclingPreview
app.py CHANGED
@@ -26,6 +26,13 @@ MAX_SELECTED_METHODS = int(os.getenv("MAX_SELECTED_METHODS", "6"))
26
  MAX_PAGES = int(os.getenv("MAX_PAGES", "2"))
27
 
28
 
 
 
 
 
 
 
 
29
  def convert_document(path, method, start_page=0, enabled=True):
30
  if enabled:
31
  print("Processing file", path, "with method", method)
 
26
  MAX_PAGES = int(os.getenv("MAX_PAGES", "2"))
27
 
28
 
29
+ # Install poppler-utils
30
+ import os
31
+
32
+ os.system("apt-get update")
33
+ os.system("apt-get install --no-install-recommends -y poppler-utils tesseract-ocr")
34
+
35
+
36
  def convert_document(path, method, start_page=0, enabled=True):
37
  if enabled:
38
  print("Processing file", path, "with method", method)
backends/smoldocling.py CHANGED
@@ -3,6 +3,7 @@
3
  # pip install docling_core
4
  # pip install transformers
5
 
 
6
  import torch
7
  from docling_core.types.doc import DoclingDocument
8
  from docling_core.types.doc.document import DocTagsDocument
@@ -32,6 +33,7 @@ messages = [
32
  ]
33
 
34
 
 
35
  def convert_smoldocling(path: str, file_name: str):
36
  doc = PDF(path)
37
  output_md = ""
 
3
  # pip install docling_core
4
  # pip install transformers
5
 
6
+ import spaces
7
  import torch
8
  from docling_core.types.doc import DoclingDocument
9
  from docling_core.types.doc.document import DocTagsDocument
 
33
  ]
34
 
35
 
36
+ @spaces.GPU(duration=120)
37
  def convert_smoldocling(path: str, file_name: str):
38
  doc = PDF(path)
39
  output_md = ""
requirements.txt CHANGED
@@ -2,7 +2,6 @@ gradio-pdf>=0.0.21
2
  PyMuPDF>=1.24.9,<1.24.14
3
  pymupdf4llm
4
  unstructured[pdf]
5
- ultralytics>=8.3.48
6
  openai
7
  img2table
8
  gmft
@@ -10,3 +9,5 @@ transformers<5.0.0,>=4.45.2
10
  pypdf
11
  docling_core
12
  opencv-contrib-python
 
 
 
2
  PyMuPDF>=1.24.9,<1.24.14
3
  pymupdf4llm
4
  unstructured[pdf]
 
5
  openai
6
  img2table
7
  gmft
 
9
  pypdf
10
  docling_core
11
  opencv-contrib-python
12
+ huggingface_hub
13
+ spaces