Spaces:
Running
Running
luanpoppe
commited on
Commit
·
ab34606
1
Parent(s):
a1f037d
feat: adicionando logs pelo axiom e sentry
Browse files- .env.example +3 -1
- _utils/custom_exception_handler.py +19 -9
- _utils/gerar_documento.py +22 -12
- _utils/handle_files.py +3 -2
- _utils/langchain_utils/Splitter_class.py +27 -11
- _utils/utils.py +24 -0
- gerar_documento/views.py +20 -10
- requirements.txt +0 -0
- setup/logging.py +74 -0
- setup/settings.py +25 -1
- setup/tokens.py +4 -1
.env.example
CHANGED
@@ -9,4 +9,6 @@ BUBBLE_TOKEN=""
|
|
9 |
LLAMA_CLOUD_API_KEY_POPS=""
|
10 |
LLAMA_CLOUD_API_KEY_PEIXE=""
|
11 |
DEEPSEEKK_API_KEY=""
|
12 |
-
GOOGLE_API_KEY_PEIXE=""
|
|
|
|
|
|
9 |
LLAMA_CLOUD_API_KEY_POPS=""
|
10 |
LLAMA_CLOUD_API_KEY_PEIXE=""
|
11 |
DEEPSEEKK_API_KEY=""
|
12 |
+
GOOGLE_API_KEY_PEIXE=""
|
13 |
+
SENTRY_DSN=""
|
14 |
+
AMBIENTE="testes"
|
_utils/custom_exception_handler.py
CHANGED
@@ -7,17 +7,22 @@ from rest_framework.views import exception_handler
|
|
7 |
import logging
|
8 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
9 |
from gerar_documento.serializer import GerarDocumentoSerializerData
|
|
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
|
14 |
def custom_exception_handler(exc, context):
|
15 |
-
print("---------------- CHEGOU NA FUNÇÃO PERSONALIZADA DE ERRO ----------------")
|
16 |
if context:
|
17 |
serializer: Dict = context["view"].serializer
|
|
|
18 |
else:
|
19 |
serializer = {}
|
20 |
-
|
|
|
|
|
|
|
|
|
21 |
resposta_bubble = enviar_resposta_final(
|
22 |
serializer.get("doc_id", ""),
|
23 |
serializer.get("form_response_id", ""),
|
@@ -25,22 +30,25 @@ def custom_exception_handler(exc, context):
|
|
25 |
serializer.get("texto_completo", ""),
|
26 |
True,
|
27 |
)
|
28 |
-
|
29 |
-
|
|
|
|
|
30 |
# Call REST framework's default exception handler first
|
31 |
response = exception_handler(exc, context)
|
32 |
|
33 |
if response and str(response.status_code)[0] != "2":
|
|
|
34 |
logger.error(f"Validation error: {response.data}")
|
35 |
|
36 |
return response
|
37 |
|
38 |
|
39 |
def custom_exception_handler_wihout_api_handler(
|
40 |
-
error, serializer: Union[GerarDocumentoSerializerData, Any]
|
41 |
):
|
42 |
bahia_tz = pytz.timezone("America/Bahia")
|
43 |
-
|
44 |
resposta_bubble = enviar_resposta_final(
|
45 |
serializer.doc_id,
|
46 |
serializer.form_response_id,
|
@@ -48,6 +56,8 @@ def custom_exception_handler_wihout_api_handler(
|
|
48 |
f"------------ ERRO NO BACKEND ÀS {datetime.now(bahia_tz).strftime("%d/%m/%Y - %H:%M:%S")} ------------:\nMensagem de erro: {error} ", # serializer.get("texto_completo", ""),
|
49 |
True,
|
50 |
)
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
7 |
import logging
|
8 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
9 |
from gerar_documento.serializer import GerarDocumentoSerializerData
|
10 |
+
from setup.logging import Axiom
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
|
15 |
def custom_exception_handler(exc, context):
|
|
|
16 |
if context:
|
17 |
serializer: Dict = context["view"].serializer
|
18 |
+
axiom_instance: Axiom = context["view"].axiom_instance
|
19 |
else:
|
20 |
serializer = {}
|
21 |
+
axiom_instance: Axiom = Axiom()
|
22 |
+
axiom_instance.send_axiom_error(
|
23 |
+
"---------------- CHEGOU NA FUNÇÃO PERSONALIZADA DE ERRO ----------------"
|
24 |
+
)
|
25 |
+
axiom_instance.send_axiom_error("INICIANDO RESPOSTA DE ERRO PARA O BUBBLE")
|
26 |
resposta_bubble = enviar_resposta_final(
|
27 |
serializer.get("doc_id", ""),
|
28 |
serializer.get("form_response_id", ""),
|
|
|
30 |
serializer.get("texto_completo", ""),
|
31 |
True,
|
32 |
)
|
33 |
+
axiom_instance.send_axiom_error(
|
34 |
+
f"resposta_bubble.status_code: {resposta_bubble.status_code}"
|
35 |
+
)
|
36 |
+
axiom_instance.send_axiom_error(f"resposta_bubble.text: {resposta_bubble.text}")
|
37 |
# Call REST framework's default exception handler first
|
38 |
response = exception_handler(exc, context)
|
39 |
|
40 |
if response and str(response.status_code)[0] != "2":
|
41 |
+
axiom_instance.send_axiom_error(f"Validation error: {response.data}")
|
42 |
logger.error(f"Validation error: {response.data}")
|
43 |
|
44 |
return response
|
45 |
|
46 |
|
47 |
def custom_exception_handler_wihout_api_handler(
|
48 |
+
error, serializer: Union[GerarDocumentoSerializerData, Any], axiom_instace: Axiom
|
49 |
):
|
50 |
bahia_tz = pytz.timezone("America/Bahia")
|
51 |
+
axiom_instace.send_axiom_error("INICIANDO RESPOSTA DE ERRO PARA O BUBBLE")
|
52 |
resposta_bubble = enviar_resposta_final(
|
53 |
serializer.doc_id,
|
54 |
serializer.form_response_id,
|
|
|
56 |
f"------------ ERRO NO BACKEND ÀS {datetime.now(bahia_tz).strftime("%d/%m/%Y - %H:%M:%S")} ------------:\nMensagem de erro: {error} ", # serializer.get("texto_completo", ""),
|
57 |
True,
|
58 |
)
|
59 |
+
axiom_instace.send_axiom_error(
|
60 |
+
f"resposta_bubble.status_code: {resposta_bubble.status_code}"
|
61 |
+
)
|
62 |
+
axiom_instace.send_axiom_error(f"resposta_bubble.text: {resposta_bubble.text}")
|
63 |
+
axiom_instace.send_axiom_error(f"------------ MOTIVO DO ERRO -----------: {error}")
|
_utils/gerar_documento.py
CHANGED
@@ -31,6 +31,7 @@ from gerar_documento.serializer import (
|
|
31 |
GerarDocumentoComPDFProprioSerializerData,
|
32 |
GerarDocumentoSerializerData,
|
33 |
)
|
|
|
34 |
|
35 |
|
36 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
@@ -44,6 +45,7 @@ async def gerar_documento(
|
|
44 |
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
45 |
],
|
46 |
listaPDFs,
|
|
|
47 |
isBubble=False,
|
48 |
):
|
49 |
"""Parâmetro "contexto" só deve ser passado quando quiser utilizar o teste com ragas, e assim, não quiser passar PDFs"""
|
@@ -68,12 +70,17 @@ async def gerar_documento(
|
|
68 |
response_auxiliar_summary = (
|
69 |
await get_response_from_auxiliar_contextual_prompt(full_text_as_array)
|
70 |
)
|
|
|
|
|
|
|
71 |
|
72 |
-
|
73 |
contextualized_chunks = await contextual_retriever.contextualize_all_chunks(
|
74 |
all_PDFs_chunks, response_auxiliar_summary
|
75 |
)
|
76 |
-
|
|
|
|
|
77 |
chunks_processados = contextualized_chunks
|
78 |
else:
|
79 |
chunks_processados = all_PDFs_chunks
|
@@ -83,14 +90,15 @@ async def gerar_documento(
|
|
83 |
cast(str, response_auxiliar_summary)
|
84 |
)
|
85 |
|
86 |
-
|
|
|
|
|
87 |
query_gerado_dinamicamente_para_o_vector_store = await llm.google_gemini(
|
88 |
"gemini-2.5-pro-exp-03-25"
|
89 |
).ainvoke([HumanMessage(content=prompt_para_gerar_query_dinamico)])
|
90 |
|
91 |
-
|
92 |
-
"
|
93 |
-
query_gerado_dinamicamente_para_o_vector_store.content,
|
94 |
)
|
95 |
|
96 |
# Create enhanced vector store and BM25 index
|
@@ -101,7 +109,7 @@ async def gerar_documento(
|
|
101 |
)
|
102 |
|
103 |
llm_ultimas_requests = serializer.llm_ultimas_requests
|
104 |
-
|
105 |
structured_summaries = await summarizer.gerar_documento_final(
|
106 |
vector_store,
|
107 |
bm25,
|
@@ -111,7 +119,7 @@ async def gerar_documento(
|
|
111 |
str, query_gerado_dinamicamente_para_o_vector_store.content
|
112 |
), # prompt_auxiliar_SEM_CONTEXT,
|
113 |
)
|
114 |
-
|
115 |
|
116 |
if not isinstance(structured_summaries, list):
|
117 |
from rest_framework.response import Response
|
@@ -126,7 +134,9 @@ async def gerar_documento(
|
|
126 |
x["source"]["context"] = x["source"]["context"][0:200]
|
127 |
|
128 |
texto_completo_como_html = convert_markdown_to_HTML(texto_completo)
|
129 |
-
|
|
|
|
|
130 |
|
131 |
if is_contextualized_chunk:
|
132 |
prompt_titulo_do_documento = response_auxiliar_summary
|
@@ -137,7 +147,7 @@ async def gerar_documento(
|
|
137 |
)
|
138 |
|
139 |
if isBubble:
|
140 |
-
|
141 |
enviar_resposta_final(
|
142 |
serializer.doc_id, # type: ignore
|
143 |
serializer.form_response_id, # type: ignore
|
@@ -146,7 +156,7 @@ async def gerar_documento(
|
|
146 |
False,
|
147 |
cast(str, titulo_do_documento),
|
148 |
)
|
149 |
-
|
150 |
|
151 |
return {
|
152 |
"texto_completo": texto_completo_como_html,
|
@@ -155,5 +165,5 @@ async def gerar_documento(
|
|
155 |
"parametros-utilizados": gerar_resposta_compilada(serializer),
|
156 |
}
|
157 |
except Exception as e:
|
158 |
-
custom_exception_handler_wihout_api_handler(e, serializer)
|
159 |
raise
|
|
|
31 |
GerarDocumentoComPDFProprioSerializerData,
|
32 |
GerarDocumentoSerializerData,
|
33 |
)
|
34 |
+
from setup.logging import Axiom
|
35 |
|
36 |
|
37 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
|
|
45 |
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
46 |
],
|
47 |
listaPDFs,
|
48 |
+
axiom_instance: Axiom,
|
49 |
isBubble=False,
|
50 |
):
|
51 |
"""Parâmetro "contexto" só deve ser passado quando quiser utilizar o teste com ragas, e assim, não quiser passar PDFs"""
|
|
|
70 |
response_auxiliar_summary = (
|
71 |
await get_response_from_auxiliar_contextual_prompt(full_text_as_array)
|
72 |
)
|
73 |
+
axiom_instance.send_axiom(
|
74 |
+
f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}"
|
75 |
+
)
|
76 |
|
77 |
+
axiom_instance.send_axiom("COMEÇANDO A FAZER AS REQUISIÇÕES DO CONTEXTUAL")
|
78 |
contextualized_chunks = await contextual_retriever.contextualize_all_chunks(
|
79 |
all_PDFs_chunks, response_auxiliar_summary
|
80 |
)
|
81 |
+
axiom_instance.send_axiom(
|
82 |
+
"TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL"
|
83 |
+
)
|
84 |
chunks_processados = contextualized_chunks
|
85 |
else:
|
86 |
chunks_processados = all_PDFs_chunks
|
|
|
90 |
cast(str, response_auxiliar_summary)
|
91 |
)
|
92 |
|
93 |
+
axiom_instance.send_axiom(
|
94 |
+
"COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE"
|
95 |
+
)
|
96 |
query_gerado_dinamicamente_para_o_vector_store = await llm.google_gemini(
|
97 |
"gemini-2.5-pro-exp-03-25"
|
98 |
).ainvoke([HumanMessage(content=prompt_para_gerar_query_dinamico)])
|
99 |
|
100 |
+
axiom_instance.send_axiom(
|
101 |
+
f"query_gerado_dinamicamente_para_o_vector_store: {query_gerado_dinamicamente_para_o_vector_store.content}",
|
|
|
102 |
)
|
103 |
|
104 |
# Create enhanced vector store and BM25 index
|
|
|
109 |
)
|
110 |
|
111 |
llm_ultimas_requests = serializer.llm_ultimas_requests
|
112 |
+
axiom_instance.send_axiom("COMEÇANDO A FAZER ÚLTIMA REQUISIÇÃO")
|
113 |
structured_summaries = await summarizer.gerar_documento_final(
|
114 |
vector_store,
|
115 |
bm25,
|
|
|
119 |
str, query_gerado_dinamicamente_para_o_vector_store.content
|
120 |
), # prompt_auxiliar_SEM_CONTEXT,
|
121 |
)
|
122 |
+
axiom_instance.send_axiom("TERMINOU DE FAZER A ÚLTIMA REQUISIÇÃO")
|
123 |
|
124 |
if not isinstance(structured_summaries, list):
|
125 |
from rest_framework.response import Response
|
|
|
134 |
x["source"]["context"] = x["source"]["context"][0:200]
|
135 |
|
136 |
texto_completo_como_html = convert_markdown_to_HTML(texto_completo)
|
137 |
+
axiom_instance.send_axiom(
|
138 |
+
f"texto_completo_como_html: {texto_completo_como_html}"
|
139 |
+
)
|
140 |
|
141 |
if is_contextualized_chunk:
|
142 |
prompt_titulo_do_documento = response_auxiliar_summary
|
|
|
147 |
)
|
148 |
|
149 |
if isBubble:
|
150 |
+
axiom_instance.send_axiom("COMEÇANDO A REQUISIÇÃO FINAL PARA O BUBBLE")
|
151 |
enviar_resposta_final(
|
152 |
serializer.doc_id, # type: ignore
|
153 |
serializer.form_response_id, # type: ignore
|
|
|
156 |
False,
|
157 |
cast(str, titulo_do_documento),
|
158 |
)
|
159 |
+
axiom_instance.send_axiom("TERMINOU A REQUISIÇÃO FINAL PARA O BUBBLE")
|
160 |
|
161 |
return {
|
162 |
"texto_completo": texto_completo_como_html,
|
|
|
165 |
"parametros-utilizados": gerar_resposta_compilada(serializer),
|
166 |
}
|
167 |
except Exception as e:
|
168 |
+
custom_exception_handler_wihout_api_handler(e, serializer, axiom_instance)
|
169 |
raise
|
_utils/handle_files.py
CHANGED
@@ -5,6 +5,7 @@ from llama_index import Document
|
|
5 |
from llama_parse import LlamaParse, ResultType
|
6 |
|
7 |
from _utils.langchain_utils.splitter_util import SplitterUtils
|
|
|
8 |
|
9 |
llama_parser_keys = [
|
10 |
os.getenv("LLAMA_CLOUD_API_KEY_POPS"),
|
@@ -12,7 +13,7 @@ llama_parser_keys = [
|
|
12 |
]
|
13 |
|
14 |
|
15 |
-
def handle_pdf_files_from_serializer(files):
|
16 |
listaPDFs = []
|
17 |
for file in files:
|
18 |
file_extension = file.name.split(".")[-1]
|
@@ -26,7 +27,7 @@ def handle_pdf_files_from_serializer(files):
|
|
26 |
temp_file.write(chunk)
|
27 |
temp_file_path = temp_file.name # Get the path of the temporary file
|
28 |
listaPDFs.append(temp_file_path)
|
29 |
-
|
30 |
return listaPDFs
|
31 |
|
32 |
|
|
|
5 |
from llama_parse import LlamaParse, ResultType
|
6 |
|
7 |
from _utils.langchain_utils.splitter_util import SplitterUtils
|
8 |
+
from setup.logging import Axiom
|
9 |
|
10 |
llama_parser_keys = [
|
11 |
os.getenv("LLAMA_CLOUD_API_KEY_POPS"),
|
|
|
13 |
]
|
14 |
|
15 |
|
16 |
+
def handle_pdf_files_from_serializer(files, axiom_instance: Axiom):
|
17 |
listaPDFs = []
|
18 |
for file in files:
|
19 |
file_extension = file.name.split(".")[-1]
|
|
|
27 |
temp_file.write(chunk)
|
28 |
temp_file_path = temp_file.name # Get the path of the temporary file
|
29 |
listaPDFs.append(temp_file_path)
|
30 |
+
axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
31 |
return listaPDFs
|
32 |
|
33 |
|
_utils/langchain_utils/Splitter_class.py
CHANGED
@@ -25,6 +25,7 @@ class Splitter:
|
|
25 |
chunk_size,
|
26 |
chunk_overlap,
|
27 |
):
|
|
|
28 |
self.text_splitter = RecursiveCharacterTextSplitter(
|
29 |
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
30 |
)
|
@@ -41,7 +42,7 @@ class Splitter:
|
|
41 |
# ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
|
42 |
full_text_as_string = ""
|
43 |
|
44 |
-
|
45 |
|
46 |
if isBubble:
|
47 |
print("\nPEGANDO PDF DO BUBBLE")
|
@@ -49,11 +50,17 @@ class Splitter:
|
|
49 |
page_boundaries, combined_text = (
|
50 |
combine_documents_without_losing_pagination(pages)
|
51 |
)
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
56 |
)
|
|
|
|
|
|
|
|
|
|
|
57 |
else:
|
58 |
if should_use_llama_parse:
|
59 |
print("\nENVIANDO PDFS PARA LLAMA PARSE")
|
@@ -61,8 +68,8 @@ class Splitter:
|
|
61 |
page_boundaries, combined_text = (
|
62 |
combine_documents_without_losing_pagination(pages)
|
63 |
)
|
64 |
-
|
65 |
-
combined_text
|
66 |
)
|
67 |
else:
|
68 |
print("\nCOMEÇANDO LEITURA DO PDF")
|
@@ -78,8 +85,8 @@ class Splitter:
|
|
78 |
combine_documents_without_losing_pagination(pages)
|
79 |
)
|
80 |
|
81 |
-
|
82 |
-
combined_text
|
83 |
)
|
84 |
|
85 |
chunks: List[DocumentChunk] = []
|
@@ -92,7 +99,7 @@ class Splitter:
|
|
92 |
# ) # Quebra o item que é um Document de UMA PÁGINA inteira em um lista onde cada item é referente a um chunk, que são pedaços menores do que uma página.
|
93 |
text_char = 0
|
94 |
print("\nQUEBRANDO PDF EM CHUNKS ORGANIZADOS")
|
95 |
-
for chunk in
|
96 |
chunk_id = str(uuid.uuid4())
|
97 |
start_char = text_char + 1
|
98 |
end_char = start_char + len(chunk)
|
@@ -129,7 +136,7 @@ class Splitter:
|
|
129 |
# char_count += len(text)
|
130 |
print("TERMINOU DE ORGANIZAR PDFS EM CHUNKS")
|
131 |
|
132 |
-
return chunks,
|
133 |
|
134 |
def load_and_split_text(self, text: str) -> List[DocumentChunk]:
|
135 |
"""Load Text and split into chunks with metadata - Criei essa função apenas para o ragas"""
|
@@ -198,3 +205,12 @@ class Splitter_Simple:
|
|
198 |
documents.append(Document(page_content=chunk))
|
199 |
|
200 |
return documents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
chunk_size,
|
26 |
chunk_overlap,
|
27 |
):
|
28 |
+
self.splitter_simple = Splitter_Simple(chunk_size, chunk_overlap)
|
29 |
self.text_splitter = RecursiveCharacterTextSplitter(
|
30 |
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
31 |
)
|
|
|
42 |
# ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
|
43 |
full_text_as_string = ""
|
44 |
|
45 |
+
chunks_of_string_only: List[str] = []
|
46 |
|
47 |
if isBubble:
|
48 |
print("\nPEGANDO PDF DO BUBBLE")
|
|
|
50 |
page_boundaries, combined_text = (
|
51 |
combine_documents_without_losing_pagination(pages)
|
52 |
)
|
53 |
+
chunks_of_string_only = (
|
54 |
+
chunks_of_string_only
|
55 |
+
+ self.splitter_simple.get_chunks_of_string_only_from_list_of_documents(
|
56 |
+
pages
|
57 |
+
)
|
58 |
)
|
59 |
+
# for page in pages:
|
60 |
+
# full_text_as_string = full_text_as_string + page.page_content
|
61 |
+
# chunks_of_string_only = chunks_of_string_only + self.text_splitter.split_text(
|
62 |
+
# combined_text
|
63 |
+
# )
|
64 |
else:
|
65 |
if should_use_llama_parse:
|
66 |
print("\nENVIANDO PDFS PARA LLAMA PARSE")
|
|
|
68 |
page_boundaries, combined_text = (
|
69 |
combine_documents_without_losing_pagination(pages)
|
70 |
)
|
71 |
+
chunks_of_string_only = (
|
72 |
+
chunks_of_string_only + self.text_splitter.split_text(combined_text)
|
73 |
)
|
74 |
else:
|
75 |
print("\nCOMEÇANDO LEITURA DO PDF")
|
|
|
85 |
combine_documents_without_losing_pagination(pages)
|
86 |
)
|
87 |
|
88 |
+
chunks_of_string_only = (
|
89 |
+
chunks_of_string_only + self.text_splitter.split_text(combined_text)
|
90 |
)
|
91 |
|
92 |
chunks: List[DocumentChunk] = []
|
|
|
99 |
# ) # Quebra o item que é um Document de UMA PÁGINA inteira em um lista onde cada item é referente a um chunk, que são pedaços menores do que uma página.
|
100 |
text_char = 0
|
101 |
print("\nQUEBRANDO PDF EM CHUNKS ORGANIZADOS")
|
102 |
+
for chunk in chunks_of_string_only:
|
103 |
chunk_id = str(uuid.uuid4())
|
104 |
start_char = text_char + 1
|
105 |
end_char = start_char + len(chunk)
|
|
|
136 |
# char_count += len(text)
|
137 |
print("TERMINOU DE ORGANIZAR PDFS EM CHUNKS")
|
138 |
|
139 |
+
return chunks, chunks_of_string_only, full_text_as_string
|
140 |
|
141 |
def load_and_split_text(self, text: str) -> List[DocumentChunk]:
|
142 |
"""Load Text and split into chunks with metadata - Criei essa função apenas para o ragas"""
|
|
|
205 |
documents.append(Document(page_content=chunk))
|
206 |
|
207 |
return documents
|
208 |
+
|
209 |
+
def get_chunks_of_string_only_from_list_of_documents(
|
210 |
+
self, lista_de_documentos: List[Document]
|
211 |
+
):
|
212 |
+
full_text_as_string = ""
|
213 |
+
for page in lista_de_documentos:
|
214 |
+
full_text_as_string = full_text_as_string + page.page_content
|
215 |
+
full_text_as_array = self.text_splitter.split_text(full_text_as_string)
|
216 |
+
return full_text_as_array
|
_utils/utils.py
CHANGED
@@ -15,6 +15,7 @@ import numpy as np
|
|
15 |
import openai
|
16 |
import pandas as pd
|
17 |
import markdown
|
|
|
18 |
|
19 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
20 |
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
|
@@ -223,3 +224,26 @@ def convert_markdown_to_HTML(text: str):
|
|
223 |
.replace("\n", "\n\n")
|
224 |
)
|
225 |
return html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
import openai
|
16 |
import pandas as pd
|
17 |
import markdown
|
18 |
+
import sentry_sdk
|
19 |
|
20 |
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
21 |
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
|
|
|
224 |
.replace("\n", "\n\n")
|
225 |
)
|
226 |
return html
|
227 |
+
|
228 |
+
|
229 |
+
def print_sentry(mensagem: str):
|
230 |
+
sentry_sdk.capture_message(mensagem, level="info")
|
231 |
+
|
232 |
+
|
233 |
+
def sentry_add_breadcrumb(mensagem: str):
|
234 |
+
sentry_sdk.add_breadcrumb(
|
235 |
+
category="infos_gerais",
|
236 |
+
message=mensagem,
|
237 |
+
level="info",
|
238 |
+
)
|
239 |
+
|
240 |
+
|
241 |
+
def sentry_add_trace(mensagem: str):
|
242 |
+
current_span = sentry_sdk.get_current_span()
|
243 |
+
|
244 |
+
if current_span is not None:
|
245 |
+
# Set custom tags
|
246 |
+
current_span.set_tag("NOME DA TAG", "VALOR DA TAG")
|
247 |
+
|
248 |
+
# Add custom data
|
249 |
+
current_span.set_data("NOME DA KEY", mensagem)
|
gerar_documento/views.py
CHANGED
@@ -5,6 +5,7 @@ from _utils.gerar_relatorio_modelo_usuario.utils import (
|
|
5 |
get_full_text_and_all_PDFs_chunks,
|
6 |
)
|
7 |
from _utils.langchain_utils.Prompt_class import Prompt
|
|
|
8 |
from setup.easy_imports import (
|
9 |
Response,
|
10 |
AsyncAPIView,
|
@@ -18,6 +19,7 @@ from _utils.gerar_documento import (
|
|
18 |
gerar_documento,
|
19 |
)
|
20 |
from _utils.gerar_relatorio_modelo_usuario.prompts import prompt_auxiliar_inicio
|
|
|
21 |
from .serializer import (
|
22 |
GerarDocumentoComPDFProprioSerializer,
|
23 |
GerarDocumentoSerializer,
|
@@ -35,8 +37,10 @@ class GerarDocumentoView(AsyncAPIView):
|
|
35 |
request=GerarDocumentoSerializer,
|
36 |
)
|
37 |
async def post(self, request):
|
|
|
38 |
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
39 |
-
|
|
|
40 |
serializer = GerarDocumentoSerializer(data=request.data)
|
41 |
if serializer.is_valid(raise_exception=True):
|
42 |
obj = serializer.get_obj() # type: ignore
|
@@ -46,15 +50,16 @@ class GerarDocumentoView(AsyncAPIView):
|
|
46 |
async def proccess_data_after_response():
|
47 |
# await asyncio.sleep(0)
|
48 |
data = cast(Dict[str, Any], serializer.validated_data)
|
49 |
-
print("\ndata: ", data)
|
50 |
self.serializer = data
|
51 |
|
52 |
listaPDFs = [l["link_arquivo"] for l in data["files"]]
|
53 |
|
54 |
-
|
55 |
|
56 |
-
resposta_llm = await gerar_documento(
|
57 |
-
|
|
|
|
|
58 |
|
59 |
# remove_pdf_temp_files(listaPDFs)
|
60 |
|
@@ -77,21 +82,26 @@ class GerarDocumentoComPDFProprioView(AsyncAPIView):
|
|
77 |
request=GerarDocumentoComPDFProprioSerializer,
|
78 |
)
|
79 |
async def post(self, request):
|
|
|
80 |
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
|
|
|
|
81 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
|
|
82 |
if serializer.is_valid(raise_exception=True):
|
83 |
data = cast(Dict[str, Any], serializer.validated_data)
|
84 |
obj = serializer.get_obj() # type: ignore
|
85 |
-
print("\n\ndata: ", data)
|
86 |
self.serializer = data
|
87 |
|
88 |
-
listaPDFs = handle_pdf_files_from_serializer(data["files"])
|
89 |
|
90 |
-
resposta_llm = await gerar_documento(obj, listaPDFs)
|
91 |
-
|
92 |
|
93 |
remove_pdf_temp_files(listaPDFs)
|
94 |
-
|
|
|
|
|
95 |
return Response({"resposta": resposta_llm})
|
96 |
|
97 |
|
|
|
5 |
get_full_text_and_all_PDFs_chunks,
|
6 |
)
|
7 |
from _utils.langchain_utils.Prompt_class import Prompt
|
8 |
+
from _utils.utils import print_sentry, sentry_add_breadcrumb
|
9 |
from setup.easy_imports import (
|
10 |
Response,
|
11 |
AsyncAPIView,
|
|
|
19 |
gerar_documento,
|
20 |
)
|
21 |
from _utils.gerar_relatorio_modelo_usuario.prompts import prompt_auxiliar_inicio
|
22 |
+
from setup.logging import Axiom, send_axiom
|
23 |
from .serializer import (
|
24 |
GerarDocumentoComPDFProprioSerializer,
|
25 |
GerarDocumentoSerializer,
|
|
|
37 |
request=GerarDocumentoSerializer,
|
38 |
)
|
39 |
async def post(self, request):
|
40 |
+
axiom_instance = Axiom()
|
41 |
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
42 |
+
axiom_instance.send_axiom("COMEÇOU NOVA REQUISIÇÃO")
|
43 |
+
axiom_instance.send_axiom(request.data)
|
44 |
serializer = GerarDocumentoSerializer(data=request.data)
|
45 |
if serializer.is_valid(raise_exception=True):
|
46 |
obj = serializer.get_obj() # type: ignore
|
|
|
50 |
async def proccess_data_after_response():
|
51 |
# await asyncio.sleep(0)
|
52 |
data = cast(Dict[str, Any], serializer.validated_data)
|
|
|
53 |
self.serializer = data
|
54 |
|
55 |
listaPDFs = [l["link_arquivo"] for l in data["files"]]
|
56 |
|
57 |
+
axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
58 |
|
59 |
+
resposta_llm = await gerar_documento(
|
60 |
+
obj, listaPDFs, axiom_instance, isBubble=True
|
61 |
+
)
|
62 |
+
axiom_instance.send_axiom(f"resposta_llm: {resposta_llm}")
|
63 |
|
64 |
# remove_pdf_temp_files(listaPDFs)
|
65 |
|
|
|
82 |
request=GerarDocumentoComPDFProprioSerializer,
|
83 |
)
|
84 |
async def post(self, request):
|
85 |
+
axiom_instance = Axiom()
|
86 |
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
87 |
+
axiom_instance.send_axiom("COMEÇOU NOVA REQUISIÇÃO")
|
88 |
+
axiom_instance.send_axiom(request.data)
|
89 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
90 |
+
|
91 |
if serializer.is_valid(raise_exception=True):
|
92 |
data = cast(Dict[str, Any], serializer.validated_data)
|
93 |
obj = serializer.get_obj() # type: ignore
|
|
|
94 |
self.serializer = data
|
95 |
|
96 |
+
listaPDFs = handle_pdf_files_from_serializer(data["files"], axiom_instance)
|
97 |
|
98 |
+
resposta_llm = await gerar_documento(obj, listaPDFs, axiom_instance)
|
99 |
+
axiom_instance.send_axiom(f"resposta_llm: {resposta_llm}")
|
100 |
|
101 |
remove_pdf_temp_files(listaPDFs)
|
102 |
+
axiom_instance.send_axiom(
|
103 |
+
"PRÓXIMA LINHA ENVIA A RESPOSTA A QUEM FEZ A REQUISIÇÃO"
|
104 |
+
)
|
105 |
return Response({"resposta": resposta_llm})
|
106 |
|
107 |
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
setup/logging.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
from attr import dataclass
|
3 |
+
from axiom_py import Client
|
4 |
+
from axiom_py.logging import AxiomHandler
|
5 |
+
import uuid
|
6 |
+
from setup.tokens import ambiente
|
7 |
+
|
8 |
+
|
9 |
+
def configure_logging():
|
10 |
+
# Initialize the Axiom client
|
11 |
+
axiom_client = Client("xaat-a5f9395e-9884-4b57-a537-a32e810cdb0a")
|
12 |
+
# client.ingest_events(
|
13 |
+
# dataset="vella",
|
14 |
+
# events=[
|
15 |
+
# {"foo": "bar"},
|
16 |
+
# {"bar": "baz"},
|
17 |
+
# ],
|
18 |
+
# )
|
19 |
+
|
20 |
+
# Create an Axiom logging handler
|
21 |
+
axiom_handler = AxiomHandler(client=axiom_client, dataset="vella")
|
22 |
+
|
23 |
+
# Set the logging level for the handler
|
24 |
+
axiom_handler.setLevel(logging.INFO)
|
25 |
+
|
26 |
+
# Optional: Define a formatter for the logs
|
27 |
+
formatter = logging.Formatter(
|
28 |
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
29 |
+
)
|
30 |
+
axiom_handler.setFormatter(formatter)
|
31 |
+
|
32 |
+
# Get the root logger and add the Axiom handler
|
33 |
+
root_logger = logging.getLogger()
|
34 |
+
root_logger.setLevel(logging.INFO)
|
35 |
+
root_logger.addHandler(axiom_handler)
|
36 |
+
|
37 |
+
|
38 |
+
def send_axiom(mensagem):
|
39 |
+
logger = logging.getLogger()
|
40 |
+
logger.info(
|
41 |
+
mensagem,
|
42 |
+
extra={
|
43 |
+
"tags": ["ambiente_testes"],
|
44 |
+
},
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
@dataclass
|
49 |
+
class Axiom:
|
50 |
+
uuid = uuid.uuid4()
|
51 |
+
|
52 |
+
array_of_logs = []
|
53 |
+
|
54 |
+
def add_axiom(self, mensagem):
|
55 |
+
self.array_of_logs.append(mensagem)
|
56 |
+
|
57 |
+
def send_final_axiom(self):
|
58 |
+
send_axiom(self.array_of_logs)
|
59 |
+
|
60 |
+
def send_axiom(self, mensagem):
|
61 |
+
print("\n", mensagem)
|
62 |
+
logger = logging.getLogger()
|
63 |
+
logger.info(
|
64 |
+
mensagem,
|
65 |
+
extra={"ambiente": [ambiente], "uuid": self.uuid},
|
66 |
+
)
|
67 |
+
|
68 |
+
def send_axiom_error(self, mensagem):
|
69 |
+
print("\n", mensagem)
|
70 |
+
logger = logging.getLogger()
|
71 |
+
logger.info(
|
72 |
+
mensagem,
|
73 |
+
extra={"ambiente": [ambiente], "uuid": self.uuid, "error": "true"},
|
74 |
+
)
|
setup/settings.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
import os
|
2 |
from dotenv import load_dotenv
|
3 |
from setup.installed_apps import INSTALLED_APPS
|
|
|
|
|
|
|
|
|
4 |
|
5 |
load_dotenv()
|
6 |
|
@@ -142,7 +146,7 @@ CORS_ORIGIN_WHITELIST = [
|
|
142 |
]
|
143 |
|
144 |
REST_FRAMEWORK = {
|
145 |
-
|
146 |
"DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.PageNumberPagination",
|
147 |
"PAGE_SIZE": 10,
|
148 |
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
|
@@ -161,3 +165,23 @@ SPECTACULAR_SETTINGS = {
|
|
161 |
"COMPONENT_SPLIT_REQUEST": True,
|
162 |
# OTHER SETTINGS
|
163 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from dotenv import load_dotenv
|
3 |
from setup.installed_apps import INSTALLED_APPS
|
4 |
+
import sentry_sdk
|
5 |
+
from sentry_sdk.integrations.django import DjangoIntegration
|
6 |
+
|
7 |
+
from setup.logging import configure_logging
|
8 |
|
9 |
load_dotenv()
|
10 |
|
|
|
146 |
]
|
147 |
|
148 |
REST_FRAMEWORK = {
|
149 |
+
"EXCEPTION_HANDLER": "_utils.custom_exception_handler.custom_exception_handler",
|
150 |
"DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.PageNumberPagination",
|
151 |
"PAGE_SIZE": 10,
|
152 |
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
|
|
|
165 |
"COMPONENT_SPLIT_REQUEST": True,
|
166 |
# OTHER SETTINGS
|
167 |
}
|
168 |
+
|
169 |
+
sentry_sdk.init(
|
170 |
+
dsn=os.environ.get("SENTRY_DSN"),
|
171 |
+
integrations=[DjangoIntegration()],
|
172 |
+
# Add data like request headers and IP for users,
|
173 |
+
# see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info
|
174 |
+
send_default_pii=True,
|
175 |
+
# Set traces_sample_rate to 1.0 to capture 100%
|
176 |
+
# of transactions for tracing.
|
177 |
+
traces_sample_rate=1.0,
|
178 |
+
# Set profile_session_sample_rate to 1.0 to profile 100%
|
179 |
+
# of profile sessions.
|
180 |
+
profile_session_sample_rate=1.0,
|
181 |
+
# Set profile_lifecycle to "trace" to automatically
|
182 |
+
# run the profiler on when there is an active transaction
|
183 |
+
profile_lifecycle="trace",
|
184 |
+
max_request_body_size="always",
|
185 |
+
)
|
186 |
+
|
187 |
+
configure_logging()
|
setup/tokens.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import os
|
2 |
-
from typing import cast
|
3 |
|
4 |
openai_api_key = cast(str, os.environ.get("OPENAI_API_KEY", ""))
|
5 |
claude_api_key = cast(str, os.environ.get("CLAUDE_API_KEY"))
|
@@ -9,3 +9,6 @@ bubble_token = cast(str, os.environ.get("BUBBLE_TOKEN"))
|
|
9 |
cohere_api_key = cast(str, os.environ.get("COHERE_API_KEY", ""))
|
10 |
deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
|
11 |
google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
from typing import Literal, cast
|
3 |
|
4 |
openai_api_key = cast(str, os.environ.get("OPENAI_API_KEY", ""))
|
5 |
claude_api_key = cast(str, os.environ.get("CLAUDE_API_KEY"))
|
|
|
9 |
cohere_api_key = cast(str, os.environ.get("COHERE_API_KEY", ""))
|
10 |
deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
|
11 |
google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
|
12 |
+
ambiente: Literal["prd", "testes"] = cast(
|
13 |
+
Literal["prd", "testes"], os.environ.get("AMBIENTE", "prd")
|
14 |
+
)
|