Kevin Hu
commited on
Commit
·
10534c3
1
Parent(s):
942993f
pypdf2 to pypdf (#1684)
Browse files### What problem does this PR solve?
pypdf and PyPDF2 possible Infinite Loop when a comment isn't followed by
a character #59
### Type of change
- [x] Refactoring
- deepdoc/parser/pdf_parser.py +1 -1
- requirements.txt +1 -0
- requirements_arm.txt +1 -0
- requirements_dev.txt +1 -0
deepdoc/parser/pdf_parser.py
CHANGED
|
@@ -23,7 +23,7 @@ import logging
|
|
| 23 |
from PIL import Image, ImageDraw
|
| 24 |
import numpy as np
|
| 25 |
from timeit import default_timer as timer
|
| 26 |
-
from
|
| 27 |
|
| 28 |
from api.utils.file_utils import get_project_base_directory
|
| 29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
|
|
|
| 23 |
from PIL import Image, ImageDraw
|
| 24 |
import numpy as np
|
| 25 |
from timeit import default_timer as timer
|
| 26 |
+
from pypdf import PdfReader as pdf2_read
|
| 27 |
|
| 28 |
from api.utils.file_utils import get_project_base_directory
|
| 29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
requirements.txt
CHANGED
|
@@ -79,3 +79,4 @@ word2number==1.1
|
|
| 79 |
xgboost==2.1.0
|
| 80 |
xpinyin==0.7.6
|
| 81 |
zhipuai==2.0.1
|
|
|
|
|
|
| 79 |
xgboost==2.1.0
|
| 80 |
xpinyin==0.7.6
|
| 81 |
zhipuai==2.0.1
|
| 82 |
+
pypdf==4.3.0
|
requirements_arm.txt
CHANGED
|
@@ -153,3 +153,4 @@ groq==0.9.0
|
|
| 153 |
wikipedia==1.4.0
|
| 154 |
Bio==1.7.1
|
| 155 |
arxiv==2.1.3
|
|
|
|
|
|
| 153 |
wikipedia==1.4.0
|
| 154 |
Bio==1.7.1
|
| 155 |
arxiv==2.1.3
|
| 156 |
+
pypdf==4.3.0
|
requirements_dev.txt
CHANGED
|
@@ -138,3 +138,4 @@ groq==0.9.0
|
|
| 138 |
wikipedia==1.4.0
|
| 139 |
Bio==1.7.1
|
| 140 |
arxiv==2.1.3
|
|
|
|
|
|
| 138 |
wikipedia==1.4.0
|
| 139 |
Bio==1.7.1
|
| 140 |
arxiv==2.1.3
|
| 141 |
+
pypdf==4.3.0
|