vella-backend / _utils /Handle_Files_Class.py
luanpoppe
feat: criando classe de utilitários
d514965
raw
history blame contribute delete
849 Bytes
from dataclasses import dataclass
from typing import List, Tuple
from _utils.langchain_utils.Splitter_class import Splitter
from _utils.models.gerar_documento import DocumentChunk
@dataclass
class HandleFilesClass:
async def get_full_text_and_all_PDFs_chunks(
self,
listaPDFs: List[str],
splitterObject: Splitter,
should_use_llama_parse: bool,
isBubble: bool,
) -> Tuple[List[DocumentChunk], List[str]]:
all_PDFs_chunks: List[DocumentChunk] = []
pages: List[str] = []
# Load and process document
for pdf_path in listaPDFs:
chunks, pages = await splitterObject.load_and_split_document(
pdf_path, should_use_llama_parse, isBubble
)
all_PDFs_chunks = all_PDFs_chunks + chunks
return all_PDFs_chunks, pages