import tempfile, os from typing import List from langchain_core.documents import Document as LangchainDocument from llama_index import Document from llama_parse import LlamaParse, ResultType from _utils.langchain_utils.splitter_util import SplitterUtils from setup.logging import Axiom llama_parser_keys = [ os.getenv("LLAMA_CLOUD_API_KEY_POPS"), os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"), ] def handle_pdf_files_from_serializer(files, axiom_instance: Axiom): listaPDFs = [] for file in files: file_extension = file.name.split(".")[-1] file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=f".{file_extension}" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in file.chunks(): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = temp_file.name # Get the path of the temporary file listaPDFs.append(temp_file_path) axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}") return listaPDFs def remove_pdf_temp_files(listaPDFs): print("\nREMOVENDO ARQUIVOS PDF TEMPORÁRIOS") for file in listaPDFs: os.remove(file) async def return_document_list_with_llama_parser(file: str): for key in llama_parser_keys: documents: List[LangchainDocument] = [] if key: parser = LlamaParse( api_key=key, result_type=ResultType.JSON, # Options: 'text', 'markdown', 'json', 'structured' language="pt", verbose=True, ) try: parsed_document = await parser.aget_json(file) except: print(f"Error with llama parser key ending with {key[-4:]}") continue # Faz com que comece o próximo loop if len(parsed_document) == 0: continue for doc in parsed_document[0].get("pages"): # type: ignore # documents.append(doc.to_langchain_format()) langchain_document = LangchainDocument( page_content=doc.get("md"), # type: ignore metadata={ "page": doc.get("page"), # type: ignore # **doc.get("metadata", {}), # type: ignore }, # Include page number in metadata ) documents.append(langchain_document) return documents # Código abaixo só é executado se o loop acima acabar e não tiver retornado um valor nenhuma vez raise ValueError(f"ALGO DEU ERRADO NO PARSER DO LLAMA PARSE:")