Spaces:
Running
Running
File size: 507 Bytes
d32424b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
from setup.easy_imports import PyPDFLoader
class Document_Class:
def load_pdf(self, pdf, ocr=False):
return PyPDFLoader(pdf, extract_images=ocr).load()
def load_and_split_pdf(self, pdf, ocr=False):
return PyPDFLoader(pdf, extract_images=ocr).load_and_split()
def get_pdf_text(self, pdf, ocr=False):
document = self.load_pdf(pdf, ocr)
texto = ""
for x in document:
texto += x.page_content
return texto
document = Document_Class()
|