Spaces:
Running
Running
from setup.easy_imports import PyPDFLoader | |
class Document_Class: | |
def load_pdf(self, pdf, ocr=False): | |
return PyPDFLoader(pdf, extract_images=ocr).load() | |
def load_and_split_pdf(self, pdf, ocr=False): | |
return PyPDFLoader(pdf, extract_images=ocr).load_and_split() | |
def get_pdf_text(self, pdf, ocr=False): | |
document = self.load_pdf(pdf, ocr) | |
texto = "" | |
for x in document: | |
texto += x.page_content | |
return texto | |
document = Document_Class() | |