luanpoppe
feat: adicionando e melhorando utilitários do langchain
d32424b
raw
history blame contribute delete
507 Bytes
from setup.easy_imports import PyPDFLoader
class Document_Class:
def load_pdf(self, pdf, ocr=False):
return PyPDFLoader(pdf, extract_images=ocr).load()
def load_and_split_pdf(self, pdf, ocr=False):
return PyPDFLoader(pdf, extract_images=ocr).load_and_split()
def get_pdf_text(self, pdf, ocr=False):
document = self.load_pdf(pdf, ocr)
texto = ""
for x in document:
texto += x.page_content
return texto
document = Document_Class()