Spaces:
Running
Running
from dataclasses import dataclass | |
from typing import List, Tuple | |
from _utils.langchain_utils.Splitter_class import Splitter | |
from _utils.models.gerar_documento import DocumentChunk | |
class HandleFilesClass: | |
async def get_full_text_and_all_PDFs_chunks( | |
self, | |
listaPDFs: List[str], | |
splitterObject: Splitter, | |
should_use_llama_parse: bool, | |
isBubble: bool, | |
) -> Tuple[List[DocumentChunk], List[str]]: | |
all_PDFs_chunks: List[DocumentChunk] = [] | |
pages: List[str] = [] | |
# Load and process document | |
for pdf_path in listaPDFs: | |
chunks, pages = await splitterObject.load_and_split_document( | |
pdf_path, should_use_llama_parse, isBubble | |
) | |
all_PDFs_chunks = all_PDFs_chunks + chunks | |
return all_PDFs_chunks, pages | |