Spaces:
Running
Running
Merge branch 'tests' of https://github.com/luanpoppe/vella-backend
Browse files- .env.example +3 -1
- .gitignore +0 -1
- Dockerfile +8 -0
- _utils/Handle_Files_Class.py +28 -0
- _utils/Utils_Class.py +10 -0
- _utils/axiom_logs.py +27 -0
- _utils/custom_exception_handler.py +1 -1
- _utils/files/ragas_testset.csv +0 -18
- _utils/gerar_documento.py +24 -135
- _utils/gerar_documento_utils/GerarDocumento.py +331 -85
- _utils/gerar_documento_utils/llm_calls.py +2 -1
- _utils/gerar_documento_utils/prompts.py +16 -5
- _utils/gerar_documento_utils/utils.py +2 -103
- _utils/google_integration/google_cloud.py +28 -0
- _utils/langchain_utils/LLM_class.py +48 -8
- _utils/langchain_utils/Splitter_class.py +157 -2
- _utils/langchain_utils/Vector_store_class.py +7 -1
- _utils/ragas.py +1 -1
- entrypoint.sh +26 -0
- gerar_documento/serializer.py +5 -16
- gerar_documento/views.py +19 -11
- requirements.txt +0 -0
- setup/installed_apps.py +1 -0
- setup/urls.py +1 -0
- simple_llm/__init__.py +0 -0
- simple_llm/admin.py +3 -0
- simple_llm/apps.py +6 -0
- simple_llm/migrations/__init__.py +0 -0
- simple_llm/models.py +3 -0
- simple_llm/serializer.py +57 -0
- simple_llm/tests.py +3 -0
- simple_llm/urls.py +11 -0
- simple_llm/views.py +73 -0
.env.example
CHANGED
@@ -11,4 +11,6 @@ LLAMA_CLOUD_API_KEY_PEIXE=""
|
|
11 |
DEEPSEEKK_API_KEY=""
|
12 |
GOOGLE_API_KEY_PEIXE=""
|
13 |
SENTRY_DSN=""
|
14 |
-
AMBIENTE="testes"
|
|
|
|
|
|
11 |
DEEPSEEKK_API_KEY=""
|
12 |
GOOGLE_API_KEY_PEIXE=""
|
13 |
SENTRY_DSN=""
|
14 |
+
AMBIENTE="testes"
|
15 |
+
GOOGLE_APPLICATION_CREDENTIALS="" # Só é necessário em ambiente de desenvolvimento que não esteja usando docker
|
16 |
+
GCP_CREDENTIALS_JSON_CONTENT="Conteúdo inteiro do arquivo vella_gcp_luan_credentials.json" # Em produção, tem que conter todo o conteúdo do arquivo de credentials. Localmente, não precisa existir
|
.gitignore
CHANGED
@@ -173,5 +173,4 @@ cython_debug/
|
|
173 |
#.idea/
|
174 |
|
175 |
# End of https://www.toptal.com/developers/gitignore/api/django
|
176 |
-
|
177 |
vella_gcp_luan_credentials.json
|
|
|
173 |
#.idea/
|
174 |
|
175 |
# End of https://www.toptal.com/developers/gitignore/api/django
|
|
|
176 |
vella_gcp_luan_credentials.json
|
Dockerfile
CHANGED
@@ -3,6 +3,10 @@ FROM python:3.12
|
|
3 |
# Instalação necessária para converter arquivos .doc
|
4 |
RUN apt-get update && apt-get install -y antiword
|
5 |
|
|
|
|
|
|
|
|
|
6 |
RUN useradd -m -u 1000 user
|
7 |
USER user
|
8 |
ENV PATH="/home/user/.local/bin:$PATH"
|
@@ -23,6 +27,10 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
23 |
RUN python manage.py collectstatic --noinput
|
24 |
|
25 |
RUN pip install uvicorn
|
|
|
|
|
|
|
|
|
26 |
CMD ["uvicorn", "setup.asgi:application", "--host", "0.0.0.0", "--port", "7860"]
|
27 |
|
28 |
# ENTRYPOINT ["python", "manage.py", "runserver"]
|
|
|
3 |
# Instalação necessária para converter arquivos .doc
|
4 |
RUN apt-get update && apt-get install -y antiword
|
5 |
|
6 |
+
# Copy the entrypoint script and make it executable
|
7 |
+
COPY entrypoint.sh /entrypoint.sh
|
8 |
+
RUN chmod +x /entrypoint.sh
|
9 |
+
|
10 |
RUN useradd -m -u 1000 user
|
11 |
USER user
|
12 |
ENV PATH="/home/user/.local/bin:$PATH"
|
|
|
27 |
RUN python manage.py collectstatic --noinput
|
28 |
|
29 |
RUN pip install uvicorn
|
30 |
+
|
31 |
+
# Set the entrypoint to our script
|
32 |
+
ENTRYPOINT ["/entrypoint.sh"]
|
33 |
+
|
34 |
CMD ["uvicorn", "setup.asgi:application", "--host", "0.0.0.0", "--port", "7860"]
|
35 |
|
36 |
# ENTRYPOINT ["python", "manage.py", "runserver"]
|
_utils/Handle_Files_Class.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import List, Tuple
|
3 |
+
|
4 |
+
from _utils.langchain_utils.Splitter_class import Splitter
|
5 |
+
from _utils.models.gerar_documento import DocumentChunk
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class HandleFilesClass:
|
10 |
+
async def get_full_text_and_all_PDFs_chunks(
|
11 |
+
self,
|
12 |
+
listaPDFs: List[str],
|
13 |
+
splitterObject: Splitter,
|
14 |
+
should_use_llama_parse: bool,
|
15 |
+
isBubble: bool,
|
16 |
+
) -> Tuple[List[DocumentChunk], List[str]]:
|
17 |
+
all_PDFs_chunks: List[DocumentChunk] = []
|
18 |
+
|
19 |
+
pages: List[str] = []
|
20 |
+
|
21 |
+
# Load and process document
|
22 |
+
for pdf_path in listaPDFs:
|
23 |
+
chunks, pages = await splitterObject.load_and_split_document(
|
24 |
+
pdf_path, should_use_llama_parse, isBubble
|
25 |
+
)
|
26 |
+
all_PDFs_chunks = all_PDFs_chunks + chunks
|
27 |
+
|
28 |
+
return all_PDFs_chunks, pages
|
_utils/Utils_Class.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
from _utils.Handle_Files_Class import HandleFilesClass
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class UtilsClass:
|
9 |
+
lista_pdfs: List[str] | None = None
|
10 |
+
handle_files = HandleFilesClass()
|
_utils/axiom_logs.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
from setup.logging import Axiom
|
4 |
+
|
5 |
+
|
6 |
+
@dataclass
|
7 |
+
class AxiomLogs:
|
8 |
+
axiom: Axiom
|
9 |
+
|
10 |
+
def texto_completo_pdf(self, full_text_as_array):
|
11 |
+
self.axiom.send_axiom(
|
12 |
+
f"INÍCIO DO TEXTO COMPLETO DOS PDFS: {full_text_as_array[0:5]}"
|
13 |
+
)
|
14 |
+
|
15 |
+
def resumo_inicial_processo(self, response_auxiliar_summary):
|
16 |
+
self.axiom.send_axiom(
|
17 |
+
f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}"
|
18 |
+
)
|
19 |
+
|
20 |
+
def inicio_requisicao_contextual(self):
|
21 |
+
self.axiom.send_axiom("COMEÇANDO A FAZER AS REQUISIÇÕES DO CONTEXTUAL")
|
22 |
+
|
23 |
+
def fim_requisicao_contextual(self):
|
24 |
+
self.axiom.send_axiom("TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL")
|
25 |
+
|
26 |
+
def chunks_inicialmente(self, chunks_processados):
|
27 |
+
self.axiom.send_axiom(f"CHUNKS PROCESSADOS INICIALMENTE: {chunks_processados}")
|
_utils/custom_exception_handler.py
CHANGED
@@ -44,7 +44,7 @@ def custom_exception_handler(exc, context):
|
|
44 |
return response
|
45 |
|
46 |
|
47 |
-
def
|
48 |
error, serializer: Union[GerarDocumentoSerializerData, Any], axiom_instace: Axiom
|
49 |
):
|
50 |
bahia_tz = pytz.timezone("America/Bahia")
|
|
|
44 |
return response
|
45 |
|
46 |
|
47 |
+
def custom_exception_handler_without_api_handler(
|
48 |
error, serializer: Union[GerarDocumentoSerializerData, Any], axiom_instace: Axiom
|
49 |
):
|
50 |
bahia_tz = pytz.timezone("America/Bahia")
|
_utils/files/ragas_testset.csv
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
position,user_input,reference,response,retrieved_contexts
|
2 |
-
0,What are the global implications of the USA Supreme Court ruling on abortion?,"The global implications of the USA Supreme Court ruling on abortion are significant. The ruling has led to limited or no access to abortion for one in three women and girls of reproductive age in states where abortion access is restricted. These states also have weaker maternal health support, higher maternal death rates, and higher child poverty rates. Additionally, the ruling has had an impact beyond national borders due to the USA's geopolitical and cultural influence globally. Organizations and activists worldwide are concerned that the ruling may inspire anti-abortion legislative and policy attacks in other countries. The ruling has also hindered progressive law reform and the implementation of abortion guidelines in certain African countries. Furthermore, the ruling has created a chilling effect in international policy spaces, empowering anti-abortion actors to undermine human rights protections.","The global implications of the USA Supreme Court ruling on abortion can be significant, as it sets a precedent for other countries and influences the global discourse on reproductive rights. Here are some potential implications:
|
3 |
-
|
4 |
-
1. Influence on other countries: The Supreme Court's ruling can serve as a reference point for other countries grappling with their own abortion laws. It can provide legal arguments and reasoning that advocates for reproductive rights can use to challenge restrictive abortion laws in their respective jurisdictions.
|
5 |
-
|
6 |
-
2. Strengthening of global reproductive rights movements: A favorable ruling by the Supreme Court can energize and empower reproductive rights movements worldwide. It can serve as a rallying point for activists and organizations advocating for women's rights, leading to increased mobilization and advocacy efforts globally.
|
7 |
-
|
8 |
-
3. Counteracting anti-abortion movements: Conversely, a ruling that restricts abortion rights can embolden anti-abortion movements globally. It can provide legitimacy to their arguments and encourage similar restrictive measures in other countries, potentially leading to a rollback of existing reproductive rights.
|
9 |
-
|
10 |
-
4. Impact on international aid and policies: The Supreme Court's ruling can influence international aid and policies related to reproductive health. It can shape the priorities and funding decisions of donor countries and organizations, potentially leading to increased support for reproductive rights initiatives or conversely, restrictions on funding for abortion-related services.
|
11 |
-
|
12 |
-
5. Shaping international human rights standards: The ruling can contribute to the development of international human rights standards regarding reproductive rights. It can influence the interpretation and application of existing human rights treaties and conventions, potentially strengthening the recognition of reproductive rights as fundamental human rights globally.
|
13 |
-
|
14 |
-
6. Global health implications: The Supreme Court's ruling can have implications for global health outcomes, particularly in countries with restrictive abortion laws. It can impact the availability and accessibility of safe and legal abortion services, potentially leading to an increase in unsafe abortions and related health complications.
|
15 |
-
|
16 |
-
It is important to note that the specific implications will depend on the nature of the Supreme Court ruling and the subsequent actions taken by governments, activists, and organizations both within and outside the United States.","[""- In 2022, the USA Supreme Court handed down a decision ruling that overturned 50 years of jurisprudence recognizing a constitutional right to abortion.\n- This decision has had a massive impact: one in three women and girls of reproductive age now live in states where abortion access is either totally or near-totally inaccessible.\n- The states with the most restrictive abortion laws have the weakest maternal health support, higher maternal death rates, and higher child poverty rates.\n- The USA Supreme Court ruling has also had impacts beyond national borders due to the geopolitical and cultural influence wielded by the USA globally and the aid it funds.\n- SRR organizations and activists across the world have expressed fear about the ruling laying the groundwork for anti-abortion legislative and policy attacks in other countries.\n- Advocates have also observed the ruling's impact on progressive law reform and the stalling of the adoption and enforcement of abortion guidelines in certain African countries.\n- The ruling has created a chilling effect in international policy spaces, emboldening anti-abortion state and non-state actors to undermine human rights protections.""
|
17 |
-
'The USA Supreme Court ruling on abortion has sparked intense debates and discussions not only within the country but also around the world. Many countries look to the United States as a leader in legal and social issues, so the decision could potentially influence the policies and attitudes towards abortion in other nations.'
|
18 |
-
""The ruling may also impact international organizations and non-governmental groups that work on reproductive rights and women's health issues. Depending on the outcome, there could be shifts in funding, advocacy efforts, and collaborations with American counterparts, leading to ripple effects in the global fight for reproductive justice.""]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_utils/gerar_documento.py
CHANGED
@@ -1,31 +1,11 @@
|
|
1 |
import os
|
2 |
-
from
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
6 |
-
from _utils.custom_exception_handler import custom_exception_handler_wihout_api_handler
|
7 |
-
from _utils.gerar_documento_utils.prompts import (
|
8 |
-
prompt_gerar_query_dinamicamente,
|
9 |
-
)
|
10 |
from _utils.gerar_documento_utils.GerarDocumento import (
|
11 |
GerarDocumento,
|
12 |
)
|
13 |
-
from _utils.gerar_documento_utils.contextual_retriever import (
|
14 |
-
ContextualRetriever,
|
15 |
-
)
|
16 |
-
from _utils.gerar_documento_utils.utils import (
|
17 |
-
generate_document_title,
|
18 |
-
gerar_resposta_compilada,
|
19 |
-
get_full_text_and_all_PDFs_chunks,
|
20 |
-
get_response_from_auxiliar_contextual_prompt,
|
21 |
-
)
|
22 |
-
from _utils.models.gerar_documento import (
|
23 |
-
RetrievalConfig,
|
24 |
-
)
|
25 |
-
import markdown
|
26 |
|
27 |
-
from _utils.langchain_utils.Prompt_class import Prompt
|
28 |
-
from _utils.utils import convert_markdown_to_HTML
|
29 |
from gerar_documento.serializer import (
|
30 |
GerarDocumentoComPDFProprioSerializer,
|
31 |
GerarDocumentoComPDFProprioSerializerData,
|
@@ -47,130 +27,39 @@ async def gerar_documento(
|
|
47 |
listaPDFs,
|
48 |
axiom_instance: Axiom,
|
49 |
isBubble=False,
|
50 |
-
):
|
51 |
try:
|
52 |
-
|
53 |
-
|
54 |
-
# Initialize enhanced summarizer
|
55 |
-
summarizer = GerarDocumento(serializer, axiom_instance)
|
56 |
-
|
57 |
-
all_PDFs_chunks, full_text_as_array = await get_full_text_and_all_PDFs_chunks(
|
58 |
-
listaPDFs,
|
59 |
-
summarizer.splitter,
|
60 |
-
serializer.should_use_llama_parse,
|
61 |
-
isBubble,
|
62 |
-
)
|
63 |
-
axiom_instance.send_axiom(
|
64 |
-
f"INÍCIO DO TEXTO COMPLETO DOS PDFS: {full_text_as_array[0:5]}"
|
65 |
-
)
|
66 |
-
|
67 |
-
is_contextualized_chunk = serializer.should_have_contextual_chunks
|
68 |
-
|
69 |
-
if is_contextualized_chunk:
|
70 |
-
response_auxiliar_summary = (
|
71 |
-
await get_response_from_auxiliar_contextual_prompt(full_text_as_array)
|
72 |
-
)
|
73 |
-
axiom_instance.send_axiom(
|
74 |
-
f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}"
|
75 |
-
)
|
76 |
-
|
77 |
-
axiom_instance.send_axiom("COMEÇANDO A FAZER AS REQUISIÇÕES DO CONTEXTUAL")
|
78 |
-
contextualized_chunks = await contextual_retriever.contextualize_all_chunks(
|
79 |
-
all_PDFs_chunks, response_auxiliar_summary, axiom_instance
|
80 |
-
)
|
81 |
-
axiom_instance.send_axiom(
|
82 |
-
"TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL"
|
83 |
-
)
|
84 |
-
chunks_processados = contextualized_chunks
|
85 |
-
axiom_instance.send_axiom(
|
86 |
-
f"CHUNKS PROCESSADOS INICIALMENTE: {chunks_processados}"
|
87 |
-
)
|
88 |
-
else:
|
89 |
-
chunks_processados = all_PDFs_chunks
|
90 |
-
|
91 |
-
llm = LLM()
|
92 |
-
prompt_para_gerar_query_dinamico = prompt_gerar_query_dinamicamente(
|
93 |
-
cast(str, response_auxiliar_summary)
|
94 |
-
)
|
95 |
-
|
96 |
-
axiom_instance.send_axiom(
|
97 |
-
"COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE"
|
98 |
-
)
|
99 |
-
query_gerado_dinamicamente_para_o_vector_store = (
|
100 |
-
await llm.google_gemini_ainvoke(
|
101 |
-
prompt_para_gerar_query_dinamico, "gemini-2.0-flash"
|
102 |
-
)
|
103 |
-
)
|
104 |
-
|
105 |
-
axiom_instance.send_axiom(
|
106 |
-
f"query_gerado_dinamicamente_para_o_vector_store: {query_gerado_dinamicamente_para_o_vector_store.content}",
|
107 |
-
)
|
108 |
-
|
109 |
-
# Create enhanced vector store and BM25 index
|
110 |
-
vector_store, bm25, chunk_ids = (
|
111 |
-
summarizer.vector_store.create_enhanced_vector_store(
|
112 |
-
chunks_processados, is_contextualized_chunk, axiom_instance
|
113 |
-
)
|
114 |
-
)
|
115 |
-
|
116 |
-
llm_ultimas_requests = serializer.llm_ultimas_requests
|
117 |
-
axiom_instance.send_axiom("COMEÇANDO A FAZER ÚLTIMA REQUISIÇÃO")
|
118 |
-
structured_summaries = await summarizer.gerar_documento_final(
|
119 |
-
vector_store,
|
120 |
-
bm25,
|
121 |
-
chunk_ids,
|
122 |
-
llm_ultimas_requests,
|
123 |
-
cast(
|
124 |
-
str, query_gerado_dinamicamente_para_o_vector_store.content
|
125 |
-
), # prompt_auxiliar_SEM_CONTEXT,
|
126 |
-
)
|
127 |
-
axiom_instance.send_axiom("TERMINOU DE FAZER A ÚLTIMA REQUISIÇÃO")
|
128 |
|
129 |
-
|
130 |
-
from rest_framework.response import Response
|
131 |
|
132 |
-
|
133 |
|
134 |
-
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
140 |
|
141 |
-
|
142 |
-
"resposta_segunda_etapa:", "<br><br>"
|
143 |
-
)
|
144 |
-
axiom_instance.send_axiom(
|
145 |
-
f"texto_completo_como_html: {texto_completo_como_html}"
|
146 |
-
)
|
147 |
|
148 |
-
|
149 |
-
prompt_titulo_do_documento = response_auxiliar_summary
|
150 |
-
else:
|
151 |
-
prompt_titulo_do_documento = texto_completo_como_html
|
152 |
-
titulo_do_documento = await generate_document_title(
|
153 |
-
cast(str, prompt_titulo_do_documento)
|
154 |
-
)
|
155 |
|
156 |
if isBubble:
|
157 |
-
|
158 |
-
enviar_resposta_final(
|
159 |
-
serializer.doc_id, # type: ignore
|
160 |
-
serializer.form_response_id, # type: ignore
|
161 |
-
serializer.version, # type: ignore
|
162 |
-
texto_completo_como_html,
|
163 |
-
False,
|
164 |
-
cast(str, titulo_do_documento),
|
165 |
-
)
|
166 |
-
axiom_instance.send_axiom("TERMINOU A REQUISIÇÃO FINAL PARA O BUBBLE")
|
167 |
|
168 |
return {
|
169 |
-
"texto_completo": texto_completo_como_html,
|
170 |
-
"titulo_do_documento": titulo_do_documento,
|
171 |
"resultado": structured_summaries,
|
172 |
-
"parametros-utilizados": gerar_resposta_compilada(
|
173 |
}
|
174 |
except Exception as e:
|
175 |
-
|
176 |
raise
|
|
|
1 |
import os
|
2 |
+
from typing import Any, Union
|
3 |
+
from _utils.custom_exception_handler import custom_exception_handler_without_api_handler
|
4 |
+
from rest_framework.response import Response
|
|
|
|
|
|
|
|
|
|
|
5 |
from _utils.gerar_documento_utils.GerarDocumento import (
|
6 |
GerarDocumento,
|
7 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
|
|
|
|
9 |
from gerar_documento.serializer import (
|
10 |
GerarDocumentoComPDFProprioSerializer,
|
11 |
GerarDocumentoComPDFProprioSerializerData,
|
|
|
27 |
listaPDFs,
|
28 |
axiom_instance: Axiom,
|
29 |
isBubble=False,
|
30 |
+
) -> Response | dict[str, Any]:
|
31 |
try:
|
32 |
+
summarizer = GerarDocumento(serializer, isBubble, axiom_instance)
|
33 |
+
summarizer.lista_pdfs = listaPDFs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
await summarizer.get_text_and_pdf_chunks()
|
|
|
36 |
|
37 |
+
await summarizer.get_response_from_auxiliar_contextual_prompt()
|
38 |
|
39 |
+
await summarizer.generate_chunks_processados()
|
40 |
|
41 |
+
await summarizer.generate_query_for_vector_store()
|
42 |
+
|
43 |
+
await summarizer.create_enhanced_vector_store()
|
44 |
+
|
45 |
+
structured_summaries = await summarizer.do_last_requests()
|
46 |
+
|
47 |
+
if not isinstance(structured_summaries, list):
|
48 |
+
return Response({"erro": structured_summaries})
|
49 |
|
50 |
+
await summarizer.generate_complete_text()
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
await summarizer.get_document_title()
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
if isBubble:
|
55 |
+
await summarizer.send_to_bubble()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
return {
|
58 |
+
"texto_completo": summarizer.texto_completo_como_html,
|
59 |
+
"titulo_do_documento": summarizer.titulo_do_documento,
|
60 |
"resultado": structured_summaries,
|
61 |
+
"parametros-utilizados": summarizer.gerar_resposta_compilada(),
|
62 |
}
|
63 |
except Exception as e:
|
64 |
+
custom_exception_handler_without_api_handler(e, serializer, axiom_instance)
|
65 |
raise
|
_utils/gerar_documento_utils/GerarDocumento.py
CHANGED
@@ -1,12 +1,23 @@
|
|
1 |
from dataclasses import dataclass
|
2 |
-
import
|
3 |
from typing import Any, List, Dict, Literal, Tuple, Optional, Union, cast
|
4 |
|
5 |
from pydantic import SecretStr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from _utils.langchain_utils.Chain_class import Chain
|
7 |
-
from _utils.langchain_utils.LLM_class import LLM
|
8 |
from _utils.langchain_utils.Prompt_class import Prompt
|
9 |
from _utils.langchain_utils.Vector_store_class import VectorStore
|
|
|
10 |
from gerar_documento.serializer import (
|
11 |
GerarDocumentoComPDFProprioSerializerData,
|
12 |
GerarDocumentoSerializerData,
|
@@ -21,13 +32,17 @@ from setup.easy_imports import (
|
|
21 |
)
|
22 |
import logging
|
23 |
from _utils.models.gerar_documento import (
|
|
|
|
|
24 |
RetrievalConfig,
|
25 |
)
|
26 |
from cohere import Client
|
27 |
from _utils.langchain_utils.Splitter_class import Splitter
|
28 |
import time
|
29 |
-
|
30 |
from setup.logging import Axiom
|
|
|
|
|
31 |
|
32 |
|
33 |
def reciprocal_rank_fusion(result_lists, weights=None):
|
@@ -51,6 +66,10 @@ def reciprocal_rank_fusion(result_lists, weights=None):
|
|
51 |
|
52 |
@dataclass
|
53 |
class GerarDocumentoUtils:
|
|
|
|
|
|
|
|
|
54 |
def criar_output_estruturado(self, summaries: List[str | Any], sources: Any):
|
55 |
structured_output = []
|
56 |
for idx, summary in enumerate(summaries):
|
@@ -81,35 +100,131 @@ class GerarDocumentoUtils:
|
|
81 |
else:
|
82 |
return documento_gerado
|
83 |
|
84 |
-
|
85 |
-
class GerarDocumento:
|
86 |
-
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
87 |
-
cohere_api_key = os.environ.get("COHERE_API_KEY", "")
|
88 |
-
resumo_gerado = ""
|
89 |
-
gerar_documento_utils = GerarDocumentoUtils()
|
90 |
-
|
91 |
-
def __init__(
|
92 |
self,
|
93 |
serializer: Union[
|
94 |
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
95 |
],
|
96 |
-
axiom_instance: Axiom,
|
97 |
):
|
98 |
-
|
99 |
num_chunks=serializer.num_chunks_retrieval,
|
100 |
embedding_weight=serializer.embedding_weight,
|
101 |
bm25_weight=serializer.bm25_weight,
|
102 |
context_window=serializer.context_window,
|
103 |
chunk_overlap=serializer.chunk_overlap,
|
104 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
self.logger = logging.getLogger(__name__)
|
106 |
# self.prompt_auxiliar = prompt_auxiliar
|
107 |
self.gpt_model = serializer.model
|
108 |
-
self.
|
109 |
self.prompt_gerar_documento = serializer.prompt_gerar_documento
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
-
self.
|
112 |
-
self.cohere_client = Client(self.cohere_api_key)
|
113 |
self.embeddings = HuggingFaceEmbeddings(model_name=serializer.hf_embedding)
|
114 |
self.num_k_rerank = serializer.num_k_rerank
|
115 |
self.model_cohere_rerank = serializer.model_cohere_rerank
|
@@ -119,6 +234,75 @@ class GerarDocumento:
|
|
119 |
|
120 |
self.vector_store = VectorStore(serializer.hf_embedding)
|
121 |
self.axiom_instance: Axiom = axiom_instance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
def retrieve_with_rank_fusion(
|
124 |
self, vector_store: Chroma, bm25: BM25Okapi, chunk_ids: List[str], query: str
|
@@ -208,40 +392,18 @@ class GerarDocumento:
|
|
208 |
|
209 |
return sources, contexts
|
210 |
|
211 |
-
def
|
212 |
-
self,
|
213 |
-
llm_ultimas_requests: Literal[
|
214 |
-
"gpt-4o-mini", "deepseek-chat", "gemini-2.0-flash", "gemini-2.5-pro"
|
215 |
-
],
|
216 |
-
):
|
217 |
-
llm_instance = LLM()
|
218 |
-
if llm_ultimas_requests == "gpt-4o-mini":
|
219 |
-
llm = ChatOpenAI(
|
220 |
-
temperature=self.gpt_temperature,
|
221 |
-
model=self.gpt_model,
|
222 |
-
api_key=SecretStr(self.openai_api_key),
|
223 |
-
)
|
224 |
-
elif llm_ultimas_requests == "deepseek-chat":
|
225 |
-
llm = llm_instance.deepseek()
|
226 |
-
elif llm_ultimas_requests == "gemini-2.0-flash":
|
227 |
-
llm = llm_instance.google_gemini("gemini-2.0-flash")
|
228 |
-
elif llm_ultimas_requests == "gemini-2.5-pro":
|
229 |
-
llm = llm_instance.google_gemini("gemini-2.5-pro-preview-05-06")
|
230 |
-
elif llm_ultimas_requests == "gemini-2.5-flash":
|
231 |
-
llm = llm_instance.google_gemini("gemini-2.5-flash-preview-04-17")
|
232 |
-
return llm
|
233 |
-
|
234 |
-
async def gerar_documento_final(
|
235 |
self,
|
236 |
-
vector_store: Chroma,
|
237 |
-
bm25: BM25Okapi,
|
238 |
-
chunk_ids: List[str],
|
239 |
-
llm_ultimas_requests: str,
|
240 |
-
query: str = "Summarize the main points of this document",
|
241 |
) -> List[Dict]:
|
242 |
try:
|
|
|
|
|
|
|
243 |
sources, contexts = self.rank_fusion_get_top_results(
|
244 |
-
vector_store,
|
|
|
|
|
|
|
245 |
)
|
246 |
|
247 |
prompt_gerar_documento = PromptTemplate(
|
@@ -249,14 +411,16 @@ class GerarDocumento:
|
|
249 |
input_variables=["context"],
|
250 |
)
|
251 |
|
252 |
-
|
253 |
prompt_instance = Prompt()
|
254 |
context_do_prompt_primeira_etapa = "\n\n".join(contexts)
|
255 |
prompt_primeira_etapa = prompt_gerar_documento.format(
|
256 |
context=context_do_prompt_primeira_etapa,
|
257 |
)
|
258 |
|
259 |
-
|
|
|
|
|
260 |
llm_ultimas_requests, prompt_primeira_etapa
|
261 |
)
|
262 |
|
@@ -274,7 +438,7 @@ class GerarDocumento:
|
|
274 |
dynamic_dict={"context": context_do_prompt_primeira_etapa},
|
275 |
)
|
276 |
# documento_gerado = llm.invoke(prompt_etapa_2).content
|
277 |
-
documento_gerado = self.checar_se_resposta_vazia_do_documento_final(
|
278 |
llm_ultimas_requests, prompt_etapa_2.to_string()
|
279 |
)
|
280 |
resposta_segunda_etapa = documento_gerado
|
@@ -292,7 +456,7 @@ class GerarDocumento:
|
|
292 |
},
|
293 |
)
|
294 |
# documento_gerado = llm.invoke(prompt_etapa_3).content
|
295 |
-
documento_gerado = self.checar_se_resposta_vazia_do_documento_final(
|
296 |
llm_ultimas_requests, prompt_etapa_3.to_string()
|
297 |
)
|
298 |
texto_final_juntando_as_etapas += f"\n\n{documento_gerado}"
|
@@ -306,49 +470,57 @@ class GerarDocumento:
|
|
306 |
structured_output = self.gerar_documento_utils.criar_output_estruturado(
|
307 |
summaries, sources
|
308 |
)
|
|
|
|
|
|
|
309 |
return structured_output
|
310 |
|
311 |
except Exception as e:
|
312 |
self.logger.error(f"Error generating enhanced summary: {str(e)}")
|
313 |
raise
|
314 |
|
315 |
-
async def
|
316 |
-
|
317 |
-
):
|
318 |
-
llm = self.select_model_for_last_requests(llm_ultimas_requests) # type: ignore
|
319 |
-
documento_gerado = ""
|
320 |
-
tentativas = 0
|
321 |
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
if hasattr(resposta, "content") and resposta.content.strip(): # type: ignore
|
327 |
-
if isinstance(resposta.content, list):
|
328 |
-
resposta.content = "\n".join(resposta.content) # type: ignore
|
329 |
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
except Exception as e:
|
334 |
-
llm = self.select_model_for_last_requests("gemini-2.0-flash")
|
335 |
-
print(f"Tentativa {tentativas}: erro ao invocar o modelo: {e}")
|
336 |
-
time.sleep(5)
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
"TENTANDO GERAR DOCUMENTO FINAL COM GPT 4o-mini COMO ÚLTIMA TENTATIVA"
|
342 |
-
)
|
343 |
-
documento_gerado = (
|
344 |
-
self.gerar_documento_utils.ultima_tentativa_requisicao(prompt)
|
345 |
-
)
|
346 |
-
except Exception as e:
|
347 |
-
raise Exception(
|
348 |
-
"Falha ao gerar o documento final na última tentativa."
|
349 |
-
) from e
|
350 |
|
351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
|
353 |
async def gerar_ementa_final(
|
354 |
self,
|
@@ -357,10 +529,10 @@ class GerarDocumento:
|
|
357 |
context_primeiro_prompt: str,
|
358 |
):
|
359 |
|
360 |
-
llm = self.select_model_for_last_requests(llm_ultimas_requests) # type: ignore
|
361 |
prompt_instance = Prompt()
|
362 |
|
363 |
-
documento_gerado = await self.checar_se_resposta_vazia_do_documento_final(
|
364 |
llm_ultimas_requests, prompt_primeira_etapa
|
365 |
)
|
366 |
|
@@ -395,3 +567,77 @@ class GerarDocumento:
|
|
395 |
self.axiom_instance.send_axiom(f"RESULTADO ETAPA 3: {documento_gerado}")
|
396 |
|
397 |
return texto_final_juntando_as_etapas
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from dataclasses import dataclass
|
2 |
+
from langchain_core.messages import HumanMessage
|
3 |
from typing import Any, List, Dict, Literal, Tuple, Optional, Union, cast
|
4 |
|
5 |
from pydantic import SecretStr
|
6 |
+
from _utils.Utils_Class import UtilsClass
|
7 |
+
from _utils.axiom_logs import AxiomLogs
|
8 |
+
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
9 |
+
from _utils.gerar_documento_utils.contextual_retriever import ContextualRetriever
|
10 |
+
from _utils.gerar_documento_utils.llm_calls import agemini_answer
|
11 |
+
from _utils.gerar_documento_utils.prompts import (
|
12 |
+
create_prompt_auxiliar_do_contextual_prompt,
|
13 |
+
prompt_gerar_query_dinamicamente,
|
14 |
+
prompt_para_gerar_titulo,
|
15 |
+
)
|
16 |
from _utils.langchain_utils.Chain_class import Chain
|
17 |
+
from _utils.langchain_utils.LLM_class import LLM, Google_llms
|
18 |
from _utils.langchain_utils.Prompt_class import Prompt
|
19 |
from _utils.langchain_utils.Vector_store_class import VectorStore
|
20 |
+
from _utils.utils import convert_markdown_to_HTML
|
21 |
from gerar_documento.serializer import (
|
22 |
GerarDocumentoComPDFProprioSerializerData,
|
23 |
GerarDocumentoSerializerData,
|
|
|
32 |
)
|
33 |
import logging
|
34 |
from _utils.models.gerar_documento import (
|
35 |
+
ContextualizedChunk,
|
36 |
+
DocumentChunk,
|
37 |
RetrievalConfig,
|
38 |
)
|
39 |
from cohere import Client
|
40 |
from _utils.langchain_utils.Splitter_class import Splitter
|
41 |
import time
|
42 |
+
from setup.tokens import openai_api_key, cohere_api_key
|
43 |
from setup.logging import Axiom
|
44 |
+
import tiktoken
|
45 |
+
from setup.environment import default_model
|
46 |
|
47 |
|
48 |
def reciprocal_rank_fusion(result_lists, weights=None):
|
|
|
66 |
|
67 |
@dataclass
|
68 |
class GerarDocumentoUtils:
|
69 |
+
axiom_instance: Axiom
|
70 |
+
temperature = 0.0
|
71 |
+
model = default_model
|
72 |
+
|
73 |
def criar_output_estruturado(self, summaries: List[str | Any], sources: Any):
|
74 |
structured_output = []
|
75 |
for idx, summary in enumerate(summaries):
|
|
|
100 |
else:
|
101 |
return documento_gerado
|
102 |
|
103 |
+
def create_retrieval_config(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
self,
|
105 |
serializer: Union[
|
106 |
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
107 |
],
|
|
|
108 |
):
|
109 |
+
return RetrievalConfig(
|
110 |
num_chunks=serializer.num_chunks_retrieval,
|
111 |
embedding_weight=serializer.embedding_weight,
|
112 |
bm25_weight=serializer.bm25_weight,
|
113 |
context_window=serializer.context_window,
|
114 |
chunk_overlap=serializer.chunk_overlap,
|
115 |
)
|
116 |
+
|
117 |
+
async def checar_se_resposta_vazia_do_documento_final(
|
118 |
+
self, llm_ultimas_requests: str, prompt: str
|
119 |
+
):
|
120 |
+
llm = self.select_model_for_last_requests(llm_ultimas_requests) # type: ignore
|
121 |
+
documento_gerado = ""
|
122 |
+
tentativas = 0
|
123 |
+
|
124 |
+
while tentativas < 5 and not documento_gerado:
|
125 |
+
tentativas += 1
|
126 |
+
try:
|
127 |
+
resposta = llm.invoke(prompt)
|
128 |
+
if hasattr(resposta, "content") and resposta.content.strip(): # type: ignore
|
129 |
+
if isinstance(resposta.content, list):
|
130 |
+
resposta.content = "\n".join(resposta.content) # type: ignore
|
131 |
+
|
132 |
+
documento_gerado = resposta.content.strip() # type: ignore
|
133 |
+
else:
|
134 |
+
print(f"Tentativa {tentativas}: resposta vazia ou inexistente.")
|
135 |
+
except Exception as e:
|
136 |
+
llm = self.select_model_for_last_requests("gemini-2.0-flash")
|
137 |
+
print(f"Tentativa {tentativas}: erro ao invocar o modelo: {e}")
|
138 |
+
time.sleep(5)
|
139 |
+
|
140 |
+
if not documento_gerado:
|
141 |
+
try:
|
142 |
+
self.axiom_instance.send_axiom(
|
143 |
+
"TENTANDO GERAR DOCUMENTO FINAL COM GPT 4o-mini COMO ÚLTIMA TENTATIVA"
|
144 |
+
)
|
145 |
+
documento_gerado = self.ultima_tentativa_requisicao(prompt)
|
146 |
+
except Exception as e:
|
147 |
+
raise Exception(
|
148 |
+
"Falha ao gerar o documento final na última tentativa."
|
149 |
+
) from e
|
150 |
+
|
151 |
+
return documento_gerado
|
152 |
+
|
153 |
+
def select_model_for_last_requests(
|
154 |
+
self,
|
155 |
+
llm_ultimas_requests: Literal[
|
156 |
+
"gpt-4o-mini", "deepseek-chat", "gemini-2.0-flash", "gemini-2.5-pro"
|
157 |
+
],
|
158 |
+
):
|
159 |
+
llm_instance = LLM()
|
160 |
+
if llm_ultimas_requests == "gpt-4o-mini":
|
161 |
+
llm = ChatOpenAI(
|
162 |
+
temperature=self.temperature,
|
163 |
+
model=self.model,
|
164 |
+
api_key=SecretStr(openai_api_key),
|
165 |
+
)
|
166 |
+
elif llm_ultimas_requests == "deepseek-chat":
|
167 |
+
llm = llm_instance.deepseek()
|
168 |
+
elif llm_ultimas_requests == "gemini-2.0-flash":
|
169 |
+
llm = llm_instance.google_gemini(
|
170 |
+
"gemini-2.0-flash", temperature=self.temperature
|
171 |
+
)
|
172 |
+
elif llm_ultimas_requests == "gemini-2.5-pro":
|
173 |
+
llm = llm_instance.google_gemini(
|
174 |
+
"gemini-2.5-pro-preview-05-06", temperature=self.temperature
|
175 |
+
)
|
176 |
+
elif llm_ultimas_requests == "gemini-2.5-flash":
|
177 |
+
llm = llm_instance.google_gemini(
|
178 |
+
"gemini-2.5-flash-preview-04-17", temperature=self.temperature
|
179 |
+
)
|
180 |
+
return llm
|
181 |
+
|
182 |
+
|
183 |
+
class GerarDocumento:
|
184 |
+
lista_pdfs: List[str]
|
185 |
+
should_use_llama_parse: bool
|
186 |
+
all_PDFs_chunks: List[DocumentChunk]
|
187 |
+
full_text_as_array: List[str]
|
188 |
+
isBubble: bool
|
189 |
+
chunks_processados: List[ContextualizedChunk] | List[DocumentChunk]
|
190 |
+
resumo_auxiliar: str
|
191 |
+
gerar_documento_utils: GerarDocumentoUtils
|
192 |
+
utils = UtilsClass()
|
193 |
+
llm = LLM()
|
194 |
+
enhanced_vector_store: tuple[Chroma, BM25Okapi, List[str]]
|
195 |
+
query_gerado_dinamicamente_para_o_vector_store: str
|
196 |
+
structured_output: List[Any]
|
197 |
+
texto_completo_como_html: str
|
198 |
+
titulo_do_documento: str
|
199 |
+
encoding_tiktoken = tiktoken.get_encoding("cl100k_base")
|
200 |
+
serializer: Union[
|
201 |
+
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
202 |
+
]
|
203 |
+
|
204 |
+
def __init__(
|
205 |
+
self,
|
206 |
+
serializer: Union[
|
207 |
+
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
208 |
+
],
|
209 |
+
isBubble: bool,
|
210 |
+
axiom_instance: Axiom,
|
211 |
+
):
|
212 |
+
self.gerar_documento_utils = GerarDocumentoUtils(axiom_instance)
|
213 |
+
self.gerar_documento_utils.temperature = serializer.gpt_temperature
|
214 |
+
self.config = self.gerar_documento_utils.create_retrieval_config(serializer)
|
215 |
+
self.serializer = serializer
|
216 |
self.logger = logging.getLogger(__name__)
|
217 |
# self.prompt_auxiliar = prompt_auxiliar
|
218 |
self.gpt_model = serializer.model
|
219 |
+
self.llm_temperature = serializer.gpt_temperature
|
220 |
self.prompt_gerar_documento = serializer.prompt_gerar_documento
|
221 |
+
self.should_use_llama_parse = serializer.should_use_llama_parse
|
222 |
+
self.isBubble = isBubble
|
223 |
+
self.is_contextualized_chunk = serializer.should_have_contextual_chunks
|
224 |
+
self.contextual_retriever = ContextualRetriever(serializer)
|
225 |
+
self.llm_ultimas_requests = serializer.llm_ultimas_requests
|
226 |
|
227 |
+
self.cohere_client = Client(cohere_api_key)
|
|
|
228 |
self.embeddings = HuggingFaceEmbeddings(model_name=serializer.hf_embedding)
|
229 |
self.num_k_rerank = serializer.num_k_rerank
|
230 |
self.model_cohere_rerank = serializer.model_cohere_rerank
|
|
|
234 |
|
235 |
self.vector_store = VectorStore(serializer.hf_embedding)
|
236 |
self.axiom_instance: Axiom = axiom_instance
|
237 |
+
self.ax = AxiomLogs(axiom_instance)
|
238 |
+
|
239 |
+
async def get_text_and_pdf_chunks(self):
|
240 |
+
all_PDFs_chunks, full_text_as_array = (
|
241 |
+
await self.utils.handle_files.get_full_text_and_all_PDFs_chunks(
|
242 |
+
self.lista_pdfs,
|
243 |
+
self.splitter,
|
244 |
+
self.should_use_llama_parse,
|
245 |
+
self.isBubble,
|
246 |
+
)
|
247 |
+
)
|
248 |
+
self.ax.texto_completo_pdf(full_text_as_array)
|
249 |
+
|
250 |
+
self.all_PDFs_chunks = all_PDFs_chunks
|
251 |
+
self.full_text_as_array = full_text_as_array
|
252 |
+
return all_PDFs_chunks, full_text_as_array
|
253 |
+
|
254 |
+
async def generate_chunks_processados(self):
|
255 |
+
if self.is_contextualized_chunk:
|
256 |
+
self.ax.inicio_requisicao_contextual()
|
257 |
+
contextualized_chunks = (
|
258 |
+
await self.contextual_retriever.contextualize_all_chunks(
|
259 |
+
self.all_PDFs_chunks, self.resumo_auxiliar, self.axiom_instance
|
260 |
+
)
|
261 |
+
)
|
262 |
+
self.ax.fim_requisicao_contextual()
|
263 |
+
|
264 |
+
chunks_processados = (
|
265 |
+
contextualized_chunks
|
266 |
+
if self.is_contextualized_chunk
|
267 |
+
else self.all_PDFs_chunks
|
268 |
+
)
|
269 |
+
self.chunks_processados = chunks_processados
|
270 |
+
if len(self.chunks_processados) == 0:
|
271 |
+
self.chunks_processados = self.all_PDFs_chunks
|
272 |
+
self.ax.chunks_inicialmente(self.chunks_processados)
|
273 |
+
return self.chunks_processados
|
274 |
+
|
275 |
+
async def generate_query_for_vector_store(self):
|
276 |
+
prompt_para_gerar_query_dinamico = prompt_gerar_query_dinamicamente(
|
277 |
+
cast(str, self.resumo_auxiliar)
|
278 |
+
)
|
279 |
+
|
280 |
+
self.axiom_instance.send_axiom(
|
281 |
+
"COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE"
|
282 |
+
)
|
283 |
+
response = await self.llm.google_gemini_ainvoke(
|
284 |
+
prompt_para_gerar_query_dinamico,
|
285 |
+
"gemini-2.0-flash",
|
286 |
+
temperature=self.llm_temperature,
|
287 |
+
)
|
288 |
+
|
289 |
+
self.query_gerado_dinamicamente_para_o_vector_store = cast(
|
290 |
+
str, response.content
|
291 |
+
)
|
292 |
+
|
293 |
+
self.axiom_instance.send_axiom(
|
294 |
+
f"query_gerado_dinamicamente_para_o_vector_store: {self.query_gerado_dinamicamente_para_o_vector_store}",
|
295 |
+
)
|
296 |
+
|
297 |
+
return self.query_gerado_dinamicamente_para_o_vector_store
|
298 |
+
|
299 |
+
async def create_enhanced_vector_store(self):
|
300 |
+
vector_store, bm25, chunk_ids = self.vector_store.create_enhanced_vector_store(
|
301 |
+
self.chunks_processados, self.is_contextualized_chunk, self.axiom_instance # type: ignore
|
302 |
+
)
|
303 |
+
|
304 |
+
self.enhanced_vector_store = vector_store, bm25, chunk_ids
|
305 |
+
return vector_store, bm25, chunk_ids
|
306 |
|
307 |
def retrieve_with_rank_fusion(
|
308 |
self, vector_store: Chroma, bm25: BM25Okapi, chunk_ids: List[str], query: str
|
|
|
392 |
|
393 |
return sources, contexts
|
394 |
|
395 |
+
async def do_last_requests(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
self,
|
|
|
|
|
|
|
|
|
|
|
397 |
) -> List[Dict]:
|
398 |
try:
|
399 |
+
self.axiom_instance.send_axiom("COMEÇANDO A FAZER ÚLTIMA REQUISIÇÃO")
|
400 |
+
vector_store, bm25, chunk_ids = self.enhanced_vector_store
|
401 |
+
|
402 |
sources, contexts = self.rank_fusion_get_top_results(
|
403 |
+
vector_store,
|
404 |
+
bm25,
|
405 |
+
chunk_ids,
|
406 |
+
self.query_gerado_dinamicamente_para_o_vector_store,
|
407 |
)
|
408 |
|
409 |
prompt_gerar_documento = PromptTemplate(
|
|
|
411 |
input_variables=["context"],
|
412 |
)
|
413 |
|
414 |
+
llm_ultimas_requests = self.llm_ultimas_requests
|
415 |
prompt_instance = Prompt()
|
416 |
context_do_prompt_primeira_etapa = "\n\n".join(contexts)
|
417 |
prompt_primeira_etapa = prompt_gerar_documento.format(
|
418 |
context=context_do_prompt_primeira_etapa,
|
419 |
)
|
420 |
|
421 |
+
self.gerar_documento_utils.model = self.gpt_model
|
422 |
+
self.gerar_documento_utils.temperature = self.llm_temperature
|
423 |
+
documento_gerado = await self.gerar_documento_utils.checar_se_resposta_vazia_do_documento_final(
|
424 |
llm_ultimas_requests, prompt_primeira_etapa
|
425 |
)
|
426 |
|
|
|
438 |
dynamic_dict={"context": context_do_prompt_primeira_etapa},
|
439 |
)
|
440 |
# documento_gerado = llm.invoke(prompt_etapa_2).content
|
441 |
+
documento_gerado = self.gerar_documento_utils.checar_se_resposta_vazia_do_documento_final(
|
442 |
llm_ultimas_requests, prompt_etapa_2.to_string()
|
443 |
)
|
444 |
resposta_segunda_etapa = documento_gerado
|
|
|
456 |
},
|
457 |
)
|
458 |
# documento_gerado = llm.invoke(prompt_etapa_3).content
|
459 |
+
documento_gerado = self.gerar_documento_utils.checar_se_resposta_vazia_do_documento_final(
|
460 |
llm_ultimas_requests, prompt_etapa_3.to_string()
|
461 |
)
|
462 |
texto_final_juntando_as_etapas += f"\n\n{documento_gerado}"
|
|
|
470 |
structured_output = self.gerar_documento_utils.criar_output_estruturado(
|
471 |
summaries, sources
|
472 |
)
|
473 |
+
|
474 |
+
self.axiom_instance.send_axiom("TERMINOU DE FAZER A ÚLTIMA REQUISIÇÃO")
|
475 |
+
self.structured_output = structured_output
|
476 |
return structured_output
|
477 |
|
478 |
except Exception as e:
|
479 |
self.logger.error(f"Error generating enhanced summary: {str(e)}")
|
480 |
raise
|
481 |
|
482 |
+
async def generate_complete_text(self):
|
483 |
+
texto_completo = "\n\n"
|
|
|
|
|
|
|
|
|
484 |
|
485 |
+
for x in self.structured_output:
|
486 |
+
texto_completo = texto_completo + x["content"] + "\n"
|
487 |
+
x["source"]["text"] = x["source"]["text"][0:200]
|
488 |
+
x["source"]["context"] = x["source"]["context"][0:200]
|
|
|
|
|
|
|
489 |
|
490 |
+
self.texto_completo_como_html = convert_markdown_to_HTML(
|
491 |
+
texto_completo
|
492 |
+
).replace("resposta_segunda_etapa:", "<br><br>")
|
|
|
|
|
|
|
|
|
493 |
|
494 |
+
self.axiom_instance.send_axiom(
|
495 |
+
f"texto_completo_como_html: {self.texto_completo_como_html}"
|
496 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
+
async def get_document_title(self):
|
499 |
+
if self.is_contextualized_chunk:
|
500 |
+
resumo_para_gerar_titulo = self.resumo_auxiliar
|
501 |
+
else:
|
502 |
+
resumo_para_gerar_titulo = self.texto_completo_como_html
|
503 |
+
|
504 |
+
prompt = prompt_para_gerar_titulo(resumo_para_gerar_titulo)
|
505 |
+
response = await agemini_answer(
|
506 |
+
prompt, "gemini-2.0-flash-lite", temperature=self.llm_temperature
|
507 |
+
)
|
508 |
+
self.titulo_do_documento = response
|
509 |
+
return self.titulo_do_documento
|
510 |
+
|
511 |
+
async def send_to_bubble(self):
|
512 |
+
self.axiom_instance.send_axiom("COMEÇANDO A REQUISIÇÃO FINAL PARA O BUBBLE")
|
513 |
+
|
514 |
+
enviar_resposta_final(
|
515 |
+
self.serializer.doc_id, # type: ignore
|
516 |
+
self.serializer.form_response_id, # type: ignore
|
517 |
+
self.serializer.version, # type: ignore
|
518 |
+
self.texto_completo_como_html,
|
519 |
+
False,
|
520 |
+
cast(str, self.titulo_do_documento),
|
521 |
+
)
|
522 |
+
|
523 |
+
self.axiom_instance.send_axiom("TERMINOU A REQUISIÇÃO FINAL PARA O BUBBLE")
|
524 |
|
525 |
async def gerar_ementa_final(
|
526 |
self,
|
|
|
529 |
context_primeiro_prompt: str,
|
530 |
):
|
531 |
|
532 |
+
llm = self.gerar_documento_utils.select_model_for_last_requests(llm_ultimas_requests) # type: ignore
|
533 |
prompt_instance = Prompt()
|
534 |
|
535 |
+
documento_gerado = await self.gerar_documento_utils.checar_se_resposta_vazia_do_documento_final(
|
536 |
llm_ultimas_requests, prompt_primeira_etapa
|
537 |
)
|
538 |
|
|
|
567 |
self.axiom_instance.send_axiom(f"RESULTADO ETAPA 3: {documento_gerado}")
|
568 |
|
569 |
return texto_final_juntando_as_etapas
|
570 |
+
|
571 |
+
# Esta função gera a resposta que será usada em cada um das requisições de cada chunk
|
572 |
+
async def get_response_from_auxiliar_contextual_prompt(self):
|
573 |
+
llms = LLM()
|
574 |
+
responses = []
|
575 |
+
|
576 |
+
current_chunk = []
|
577 |
+
current_token_count = 0
|
578 |
+
chunk_counter = 1
|
579 |
+
|
580 |
+
for part in self.full_text_as_array:
|
581 |
+
part_tokens = len(self.encoding_tiktoken.encode(part))
|
582 |
+
|
583 |
+
# Check if adding this part would EXCEED the limit
|
584 |
+
if current_token_count + part_tokens > 600000:
|
585 |
+
# Process the accumulated chunk before it exceeds the limit
|
586 |
+
chunk_text = "".join(current_chunk)
|
587 |
+
print(
|
588 |
+
f"\nProcessing chunk {chunk_counter} with {current_token_count} tokens"
|
589 |
+
)
|
590 |
+
|
591 |
+
prompt = create_prompt_auxiliar_do_contextual_prompt(chunk_text)
|
592 |
+
response = await llms.google_gemini(
|
593 |
+
temperature=self.llm_temperature
|
594 |
+
).ainvoke([HumanMessage(content=prompt)])
|
595 |
+
responses.append(response.content)
|
596 |
+
|
597 |
+
# Start new chunk with current part
|
598 |
+
current_chunk = [part]
|
599 |
+
current_token_count = part_tokens
|
600 |
+
chunk_counter += 1
|
601 |
+
else:
|
602 |
+
# Safe to add to current chunk
|
603 |
+
current_chunk.append(part)
|
604 |
+
current_token_count += part_tokens
|
605 |
+
|
606 |
+
# Process the final remaining chunk
|
607 |
+
if current_chunk:
|
608 |
+
chunk_text = "".join(current_chunk)
|
609 |
+
print(
|
610 |
+
f"\nProcessing final chunk {chunk_counter} with {current_token_count} tokens"
|
611 |
+
)
|
612 |
+
prompt = create_prompt_auxiliar_do_contextual_prompt(chunk_text)
|
613 |
+
response = await llms.google_gemini(
|
614 |
+
temperature=self.llm_temperature
|
615 |
+
).ainvoke([HumanMessage(content=prompt)])
|
616 |
+
responses.append(response.content)
|
617 |
+
|
618 |
+
self.resumo_auxiliar = "".join(responses)
|
619 |
+
self.ax.resumo_inicial_processo(self.resumo_auxiliar)
|
620 |
+
|
621 |
+
return self.resumo_auxiliar
|
622 |
+
|
623 |
+
def gerar_resposta_compilada(self):
|
624 |
+
serializer = self.serializer
|
625 |
+
return {
|
626 |
+
"num_chunks_retrieval": serializer.num_chunks_retrieval,
|
627 |
+
"embedding_weight": serializer.embedding_weight,
|
628 |
+
"bm25_weight": serializer.bm25_weight,
|
629 |
+
"context_window": serializer.context_window,
|
630 |
+
"chunk_overlap": serializer.chunk_overlap,
|
631 |
+
"num_k_rerank": serializer.num_k_rerank,
|
632 |
+
"model_cohere_rerank": serializer.model_cohere_rerank,
|
633 |
+
"more_initial_chunks_for_reranking": serializer.more_initial_chunks_for_reranking,
|
634 |
+
"claude_context_model": serializer.claude_context_model,
|
635 |
+
"gpt_temperature": serializer.gpt_temperature,
|
636 |
+
"user_message": serializer.user_message,
|
637 |
+
"model": serializer.model,
|
638 |
+
"hf_embedding": serializer.hf_embedding,
|
639 |
+
"chunk_size": serializer.chunk_size,
|
640 |
+
"chunk_overlap": serializer.chunk_overlap,
|
641 |
+
# "prompt_auxiliar": serializer.prompt_auxiliar,
|
642 |
+
"prompt_gerar_documento": serializer.prompt_gerar_documento[0:200],
|
643 |
+
}
|
_utils/gerar_documento_utils/llm_calls.py
CHANGED
@@ -62,8 +62,9 @@ async def agemini_answer(
|
|
62 |
model: Literal[
|
63 |
"gemini-2.5-pro-preview-05-06", "gemini-2.0-flash", "gemini-2.0-flash-lite"
|
64 |
] = "gemini-2.0-flash",
|
|
|
65 |
) -> str:
|
66 |
-
gemini = llm.google_gemini(model)
|
67 |
resposta = await gemini.ainvoke([HumanMessage(content=prompt)])
|
68 |
|
69 |
if isinstance(resposta.content, list):
|
|
|
62 |
model: Literal[
|
63 |
"gemini-2.5-pro-preview-05-06", "gemini-2.0-flash", "gemini-2.0-flash-lite"
|
64 |
] = "gemini-2.0-flash",
|
65 |
+
temperature=0.4,
|
66 |
) -> str:
|
67 |
+
gemini = llm.google_gemini(model, temperature)
|
68 |
resposta = await gemini.ainvoke([HumanMessage(content=prompt)])
|
69 |
|
70 |
if isinstance(resposta.content, list):
|
_utils/gerar_documento_utils/prompts.py
CHANGED
@@ -1,4 +1,14 @@
|
|
1 |
-
def create_prompt_auxiliar_do_contextual_prompt(PROCESSO_JURIDICO: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
return f"""
|
3 |
<prompt>
|
4 |
<persona>
|
@@ -46,10 +56,7 @@ Seu objetivo é analisar o processo jurídico fornecido e gerar um relatório co
|
|
46 |
<instrucoes>
|
47 |
Siga estritamente os passos abaixo:
|
48 |
|
49 |
-
|
50 |
-
<processo_juridico>
|
51 |
-
{PROCESSO_JURIDICO}
|
52 |
-
</processo_juridico>
|
53 |
|
54 |
2. **Identificação e Listagem de Peças:** Identifique quais das peças listadas na `<tarefa>` estão presentes no texto. Liste **apenas** as encontradas na tag `<pecas_identificadas>`.
|
55 |
|
@@ -239,3 +246,7 @@ def prompt_gerar_query_dinamicamente(resumo_do_processo: str):
|
|
239 |
|
240 |
- *Importante:** Sua resposta final deve ser *somente* a string da query.
|
241 |
"""
|
|
|
|
|
|
|
|
|
|
1 |
+
def create_prompt_auxiliar_do_contextual_prompt(PROCESSO_JURIDICO: str | None = None):
|
2 |
+
if PROCESSO_JURIDICO:
|
3 |
+
adicionar_ao_prompt = f"""
|
4 |
+
1. **Análise Completa:** Leia e analise todo o conteúdo do processo fornecido.
|
5 |
+
<processo_juridico>
|
6 |
+
{PROCESSO_JURIDICO}
|
7 |
+
</processo_juridico>"""
|
8 |
+
else:
|
9 |
+
adicionar_ao_prompt = """
|
10 |
+
1. **Análise Completa:** Leia e analise todo o conteúdo do processo fornecido como PDF."""
|
11 |
+
|
12 |
return f"""
|
13 |
<prompt>
|
14 |
<persona>
|
|
|
56 |
<instrucoes>
|
57 |
Siga estritamente os passos abaixo:
|
58 |
|
59 |
+
{adicionar_ao_prompt}
|
|
|
|
|
|
|
60 |
|
61 |
2. **Identificação e Listagem de Peças:** Identifique quais das peças listadas na `<tarefa>` estão presentes no texto. Liste **apenas** as encontradas na tag `<pecas_identificadas>`.
|
62 |
|
|
|
246 |
|
247 |
- *Importante:** Sua resposta final deve ser *somente* a string da query.
|
248 |
"""
|
249 |
+
|
250 |
+
|
251 |
+
def prompt_para_gerar_titulo(resumo_para_gerar_titulo: str):
|
252 |
+
return f"Você é um assistente jurídico e irá receber abaixo o resumo de um documento jurídico. Quero que você gere um título para este documento. Mande como resposta apenas o título gerado, nada mais. Aqui está um título de exemplo pra você se basear ao criar um novo: <titulo_de_exemplo>Ação Penal por Furto Qualificado nº 0002269-86.2009.805.0032<titulo_de_exemplo>\n\nSegue abaixo o resumo do documento jurídico:\n{resumo_para_gerar_titulo}"
|
_utils/gerar_documento_utils/utils.py
CHANGED
@@ -1,91 +1,10 @@
|
|
1 |
-
from typing import Any, List, Tuple, Union
|
2 |
-
from langchain_core.documents import Document
|
3 |
-
from langchain_core.messages import HumanMessage
|
4 |
-
|
5 |
from _utils.gerar_documento_utils.llm_calls import agemini_answer
|
6 |
-
from _utils.
|
7 |
-
from _utils.langchain_utils.LLM_class import LLM
|
8 |
-
from _utils.gerar_documento_utils.prompts import (
|
9 |
-
create_prompt_auxiliar_do_contextual_prompt,
|
10 |
-
)
|
11 |
-
|
12 |
-
from _utils.models.gerar_documento import DocumentChunk
|
13 |
-
from gerar_documento.serializer import GerarDocumentoSerializerData
|
14 |
import tiktoken
|
15 |
|
16 |
encoding = tiktoken.get_encoding("cl100k_base")
|
17 |
|
18 |
|
19 |
-
def gerar_resposta_compilada(serializer: Union[GerarDocumentoSerializerData, Any]):
|
20 |
-
return {
|
21 |
-
"num_chunks_retrieval": serializer.num_chunks_retrieval,
|
22 |
-
"embedding_weight": serializer.embedding_weight,
|
23 |
-
"bm25_weight": serializer.bm25_weight,
|
24 |
-
"context_window": serializer.context_window,
|
25 |
-
"chunk_overlap": serializer.chunk_overlap,
|
26 |
-
"num_k_rerank": serializer.num_k_rerank,
|
27 |
-
"model_cohere_rerank": serializer.model_cohere_rerank,
|
28 |
-
"more_initial_chunks_for_reranking": serializer.more_initial_chunks_for_reranking,
|
29 |
-
"claude_context_model": serializer.claude_context_model,
|
30 |
-
"gpt_temperature": serializer.gpt_temperature,
|
31 |
-
"user_message": serializer.user_message,
|
32 |
-
"model": serializer.model,
|
33 |
-
"hf_embedding": serializer.hf_embedding,
|
34 |
-
"chunk_size": serializer.chunk_size,
|
35 |
-
"chunk_overlap": serializer.chunk_overlap,
|
36 |
-
# "prompt_auxiliar": serializer.prompt_auxiliar,
|
37 |
-
"prompt_gerar_documento": serializer.prompt_gerar_documento[0:200],
|
38 |
-
}
|
39 |
-
|
40 |
-
|
41 |
-
# Esta função gera a resposta que será usada em cada um das requisições de cada chunk
|
42 |
-
async def get_response_from_auxiliar_contextual_prompt(full_text_as_array: List[str]):
|
43 |
-
llms = LLM()
|
44 |
-
responses = []
|
45 |
-
|
46 |
-
current_chunk = []
|
47 |
-
current_token_count = 0
|
48 |
-
chunk_counter = 1
|
49 |
-
|
50 |
-
for part in full_text_as_array:
|
51 |
-
part_tokens = len(encoding.encode(part))
|
52 |
-
|
53 |
-
# Check if adding this part would EXCEED the limit
|
54 |
-
if current_token_count + part_tokens > 600000:
|
55 |
-
# Process the accumulated chunk before it exceeds the limit
|
56 |
-
chunk_text = "".join(current_chunk)
|
57 |
-
print(
|
58 |
-
f"\nProcessing chunk {chunk_counter} with {current_token_count} tokens"
|
59 |
-
)
|
60 |
-
|
61 |
-
prompt = create_prompt_auxiliar_do_contextual_prompt(chunk_text)
|
62 |
-
response = await llms.google_gemini().ainvoke(
|
63 |
-
[HumanMessage(content=prompt)]
|
64 |
-
)
|
65 |
-
responses.append(response.content)
|
66 |
-
|
67 |
-
# Start new chunk with current part
|
68 |
-
current_chunk = [part]
|
69 |
-
current_token_count = part_tokens
|
70 |
-
chunk_counter += 1
|
71 |
-
else:
|
72 |
-
# Safe to add to current chunk
|
73 |
-
current_chunk.append(part)
|
74 |
-
current_token_count += part_tokens
|
75 |
-
|
76 |
-
# Process the final remaining chunk
|
77 |
-
if current_chunk:
|
78 |
-
chunk_text = "".join(current_chunk)
|
79 |
-
print(
|
80 |
-
f"\nProcessing final chunk {chunk_counter} with {current_token_count} tokens"
|
81 |
-
)
|
82 |
-
prompt = create_prompt_auxiliar_do_contextual_prompt(chunk_text)
|
83 |
-
response = await llms.google_gemini().ainvoke([HumanMessage(content=prompt)])
|
84 |
-
responses.append(response.content)
|
85 |
-
|
86 |
-
return "".join(responses)
|
87 |
-
|
88 |
-
|
89 |
def split_text_by_tokens(full_text: str):
|
90 |
tokens = encoding.encode(full_text)
|
91 |
max_tokens = 600000
|
@@ -101,27 +20,7 @@ def split_text_by_tokens(full_text: str):
|
|
101 |
return text_chunks
|
102 |
|
103 |
|
104 |
-
async def get_full_text_and_all_PDFs_chunks(
|
105 |
-
listaPDFs: List[str],
|
106 |
-
splitterObject: Splitter,
|
107 |
-
should_use_llama_parse: bool,
|
108 |
-
isBubble: bool,
|
109 |
-
) -> Tuple[List[DocumentChunk], List[str]]:
|
110 |
-
all_PDFs_chunks: List[DocumentChunk] = []
|
111 |
-
|
112 |
-
pages: List[str] = []
|
113 |
-
|
114 |
-
# Load and process document
|
115 |
-
for pdf_path in listaPDFs:
|
116 |
-
chunks, pages = await splitterObject.load_and_split_document(
|
117 |
-
pdf_path, should_use_llama_parse, isBubble
|
118 |
-
)
|
119 |
-
all_PDFs_chunks = all_PDFs_chunks + chunks
|
120 |
-
|
121 |
-
return all_PDFs_chunks, pages
|
122 |
-
|
123 |
-
|
124 |
async def generate_document_title(resumo_para_gerar_titulo: str):
|
125 |
-
prompt =
|
126 |
response = await agemini_answer(prompt, "gemini-2.0-flash-lite")
|
127 |
return response
|
|
|
|
|
|
|
|
|
|
|
1 |
from _utils.gerar_documento_utils.llm_calls import agemini_answer
|
2 |
+
from _utils.gerar_documento_utils.prompts import prompt_para_gerar_titulo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import tiktoken
|
4 |
|
5 |
encoding = tiktoken.get_encoding("cl100k_base")
|
6 |
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def split_text_by_tokens(full_text: str):
|
9 |
tokens = encoding.encode(full_text)
|
10 |
max_tokens = 600000
|
|
|
20 |
return text_chunks
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
async def generate_document_title(resumo_para_gerar_titulo: str):
|
24 |
+
prompt = prompt_para_gerar_titulo(resumo_para_gerar_titulo)
|
25 |
response = await agemini_answer(prompt, "gemini-2.0-flash-lite")
|
26 |
return response
|
_utils/google_integration/google_cloud.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from google.cloud import storage
|
3 |
+
|
4 |
+
GCP_PROJECT = "gen-lang-client-0350149082"
|
5 |
+
GCP_REGION = "us-central1"
|
6 |
+
DOCUMENT_API_ID = "b34a20d22dee16bb"
|
7 |
+
GCS_BUCKET_NAME = "vella-pdfs"
|
8 |
+
|
9 |
+
|
10 |
+
def upload_to_gcs(LOCAL_PDF_PATH: str) -> str:
|
11 |
+
|
12 |
+
# Path in GCS
|
13 |
+
GCS_DESTINATION_BLOB_NAME = "gemini_uploads/" + os.path.basename(LOCAL_PDF_PATH)
|
14 |
+
|
15 |
+
"""Uploads a file to a GCS bucket and returns its URI."""
|
16 |
+
storage_client = storage.Client(
|
17 |
+
project=GCP_PROJECT,
|
18 |
+
)
|
19 |
+
bucket = storage_client.bucket(GCS_BUCKET_NAME)
|
20 |
+
blob = bucket.blob(GCS_DESTINATION_BLOB_NAME)
|
21 |
+
|
22 |
+
print(
|
23 |
+
f"Uploading {LOCAL_PDF_PATH} to gs://{GCS_BUCKET_NAME}/{GCS_DESTINATION_BLOB_NAME}..."
|
24 |
+
)
|
25 |
+
blob.upload_from_filename(LOCAL_PDF_PATH)
|
26 |
+
gcs_uri = f"gs://{GCS_BUCKET_NAME}/{GCS_DESTINATION_BLOB_NAME}"
|
27 |
+
print(f"File uploaded to {gcs_uri}")
|
28 |
+
return gcs_uri
|
_utils/langchain_utils/LLM_class.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
-
from typing import Literal, cast
|
2 |
from pydantic import SecretStr
|
3 |
-
from
|
4 |
from setup.easy_imports import ChatOpenAI, ChatGoogleGenerativeAI
|
5 |
import os
|
6 |
from langchain_core.messages import HumanMessage
|
|
|
7 |
|
8 |
deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
|
9 |
google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
|
@@ -31,14 +32,11 @@ class LLM:
|
|
31 |
model=model,
|
32 |
)
|
33 |
|
34 |
-
def google_gemini(
|
35 |
-
self,
|
36 |
-
model: Google_llms = "gemini-2.0-flash",
|
37 |
-
):
|
38 |
return ChatGoogleGenerativeAI(
|
39 |
api_key=SecretStr(google_api_key),
|
40 |
model=model,
|
41 |
-
temperature=
|
42 |
max_tokens=None,
|
43 |
timeout=None,
|
44 |
max_retries=2,
|
@@ -49,10 +47,11 @@ class LLM:
|
|
49 |
prompt: str,
|
50 |
model: Google_llms = "gemini-2.0-flash",
|
51 |
max_retries: int = 3,
|
|
|
52 |
):
|
53 |
for attempt in range(max_retries):
|
54 |
try:
|
55 |
-
response = await self.google_gemini(model).ainvoke(
|
56 |
[HumanMessage(content=prompt)]
|
57 |
)
|
58 |
|
@@ -75,3 +74,44 @@ class LLM:
|
|
75 |
raise Exception(
|
76 |
"Failed to generate the final document after 5 retries and the fallback attempt with chat-gpt-4o-mini."
|
77 |
) from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Literal, cast
|
2 |
from pydantic import SecretStr
|
3 |
+
from _utils.google_integration.google_cloud import GCP_PROJECT, upload_to_gcs
|
4 |
from setup.easy_imports import ChatOpenAI, ChatGoogleGenerativeAI
|
5 |
import os
|
6 |
from langchain_core.messages import HumanMessage
|
7 |
+
from langchain_google_vertexai import ChatVertexAI
|
8 |
|
9 |
deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
|
10 |
google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
|
|
|
32 |
model=model,
|
33 |
)
|
34 |
|
35 |
+
def google_gemini(self, model: Google_llms = "gemini-2.0-flash", temperature=0.4):
|
|
|
|
|
|
|
36 |
return ChatGoogleGenerativeAI(
|
37 |
api_key=SecretStr(google_api_key),
|
38 |
model=model,
|
39 |
+
temperature=temperature,
|
40 |
max_tokens=None,
|
41 |
timeout=None,
|
42 |
max_retries=2,
|
|
|
47 |
prompt: str,
|
48 |
model: Google_llms = "gemini-2.0-flash",
|
49 |
max_retries: int = 3,
|
50 |
+
temperature=0.4,
|
51 |
):
|
52 |
for attempt in range(max_retries):
|
53 |
try:
|
54 |
+
response = await self.google_gemini(model, temperature).ainvoke(
|
55 |
[HumanMessage(content=prompt)]
|
56 |
)
|
57 |
|
|
|
74 |
raise Exception(
|
75 |
"Failed to generate the final document after 5 retries and the fallback attempt with chat-gpt-4o-mini."
|
76 |
) from e
|
77 |
+
|
78 |
+
async def google_gemini_vertex_ainvoke(
|
79 |
+
self,
|
80 |
+
prompt: str,
|
81 |
+
list_of_pdfs: List[str],
|
82 |
+
model: Google_llms = "gemini-2.5-flash-preview-04-17",
|
83 |
+
max_retries: int = 3,
|
84 |
+
) -> str | None:
|
85 |
+
message_parts = [
|
86 |
+
{"type": "text", "text": prompt},
|
87 |
+
]
|
88 |
+
for pdf in list_of_pdfs:
|
89 |
+
pdf_gcs_uri = upload_to_gcs(pdf)
|
90 |
+
message_parts.append(
|
91 |
+
{
|
92 |
+
# This structure is used for file references via URI
|
93 |
+
"type": "media",
|
94 |
+
"mime_type": "application/pdf", # <-- mime_type moved up
|
95 |
+
"file_uri": pdf_gcs_uri, # <-- file_uri moved up
|
96 |
+
}
|
97 |
+
)
|
98 |
+
|
99 |
+
for attempt in range(max_retries):
|
100 |
+
try:
|
101 |
+
llm = ChatVertexAI(
|
102 |
+
model_name=model,
|
103 |
+
project=GCP_PROJECT,
|
104 |
+
location="us-central1",
|
105 |
+
temperature=0,
|
106 |
+
)
|
107 |
+
response = await llm.ainvoke(
|
108 |
+
[HumanMessage(content=message_parts)] # type: ignore
|
109 |
+
)
|
110 |
+
|
111 |
+
if isinstance(response.content, list):
|
112 |
+
response.content = "\n".join(response.content) # type: ignore
|
113 |
+
|
114 |
+
return response.content # type: ignore
|
115 |
+
except Exception as e:
|
116 |
+
model = "gemini-2.0-flash"
|
117 |
+
print(f"Attempt {attempt + 1} failed with error: {e}")
|
_utils/langchain_utils/Splitter_class.py
CHANGED
@@ -1,4 +1,9 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
2 |
from _utils.handle_files import return_document_list_with_llama_parser
|
3 |
from _utils.langchain_utils.splitter_util import (
|
4 |
Splitter_Simple,
|
@@ -18,6 +23,17 @@ from _utils.models.gerar_documento import (
|
|
18 |
DocumentChunk,
|
19 |
)
|
20 |
import uuid
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
class Splitter:
|
@@ -34,7 +50,10 @@ class Splitter:
|
|
34 |
self.chunk_metadata = {} # Store chunk metadata for tracing
|
35 |
|
36 |
async def load_and_split_document(
|
37 |
-
self,
|
|
|
|
|
|
|
38 |
):
|
39 |
"""Load PDF and split into chunks with metadata"""
|
40 |
# loader = PyPDFLoader(pdf_path)
|
@@ -144,6 +163,13 @@ class Splitter:
|
|
144 |
# char_count += len(text)
|
145 |
print("TERMINOU DE ORGANIZAR PDFS EM CHUNKS")
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
return chunks, chunks_of_string_only
|
148 |
|
149 |
def load_and_split_text(self, text: str) -> List[DocumentChunk]:
|
@@ -185,3 +211,132 @@ class Splitter:
|
|
185 |
char_count += len(text)
|
186 |
|
187 |
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
from _utils.bubble_integrations.obter_arquivo import (
|
4 |
+
download_file_from_bubble,
|
5 |
+
get_pdf_from_bubble,
|
6 |
+
)
|
7 |
from _utils.handle_files import return_document_list_with_llama_parser
|
8 |
from _utils.langchain_utils.splitter_util import (
|
9 |
Splitter_Simple,
|
|
|
23 |
DocumentChunk,
|
24 |
)
|
25 |
import uuid
|
26 |
+
import json
|
27 |
+
from _utils.google_integration.google_cloud import (
|
28 |
+
DOCUMENT_API_ID,
|
29 |
+
GCP_PROJECT,
|
30 |
+
GCP_REGION,
|
31 |
+
GCS_BUCKET_NAME,
|
32 |
+
upload_to_gcs,
|
33 |
+
)
|
34 |
+
from google.cloud import documentai
|
35 |
+
from google.cloud import storage
|
36 |
+
from _utils.bubble_integrations.obter_arquivo import headers
|
37 |
|
38 |
|
39 |
class Splitter:
|
|
|
50 |
self.chunk_metadata = {} # Store chunk metadata for tracing
|
51 |
|
52 |
async def load_and_split_document(
|
53 |
+
self,
|
54 |
+
pdf_path: str,
|
55 |
+
should_use_llama_parse: bool,
|
56 |
+
isBubble: bool,
|
57 |
):
|
58 |
"""Load PDF and split into chunks with metadata"""
|
59 |
# loader = PyPDFLoader(pdf_path)
|
|
|
163 |
# char_count += len(text)
|
164 |
print("TERMINOU DE ORGANIZAR PDFS EM CHUNKS")
|
165 |
|
166 |
+
if len(pages) == 0 or len(chunks) == 0:
|
167 |
+
if isBubble:
|
168 |
+
pdf_path = download_file_from_bubble(pdf_path, headers, "pdf")
|
169 |
+
text = await self.getOCRFromGoogleDocumentAPI(pdf_path)
|
170 |
+
chunks = self.load_and_split_text(text) # type: ignore
|
171 |
+
chunks_of_string_only = [chunk.content for chunk in chunks]
|
172 |
+
|
173 |
return chunks, chunks_of_string_only
|
174 |
|
175 |
def load_and_split_text(self, text: str) -> List[DocumentChunk]:
|
|
|
211 |
char_count += len(text)
|
212 |
|
213 |
return chunks
|
214 |
+
|
215 |
+
async def getOCRFromGoogleDocumentAPI(self, pdf_path: str):
|
216 |
+
|
217 |
+
pdf_gcs_uri = upload_to_gcs(pdf_path)
|
218 |
+
|
219 |
+
GCS_OUTPUT_PREFIX = "documentai_output/"
|
220 |
+
# GCS_INPUT_URI = f"gs://{GCS_BUCKET_NAME}/{f"gemini_uploads/{pdf_gcs_uri}"}"
|
221 |
+
GCS_INPUT_URI = pdf_gcs_uri
|
222 |
+
GCS_OUTPUT_URI = f"gs://{GCS_BUCKET_NAME}/{GCS_OUTPUT_PREFIX}"
|
223 |
+
|
224 |
+
docai_client = documentai.DocumentProcessorServiceClient()
|
225 |
+
|
226 |
+
processor_name = docai_client.processor_path(
|
227 |
+
project=GCP_PROJECT, location="us", processor=DOCUMENT_API_ID
|
228 |
+
)
|
229 |
+
|
230 |
+
gcs_document = documentai.GcsDocument(
|
231 |
+
gcs_uri=GCS_INPUT_URI,
|
232 |
+
mime_type="application/pdf", # Mime type is specified here for GcsDocument
|
233 |
+
)
|
234 |
+
|
235 |
+
gcs_documents = documentai.GcsDocuments(documents=[gcs_document])
|
236 |
+
|
237 |
+
# 3. Create the BatchDocumentsInputConfig
|
238 |
+
input_config = documentai.BatchDocumentsInputConfig(gcs_documents=gcs_documents)
|
239 |
+
# Note: If GCS_INPUT_URI was a prefix for multiple files, you'd use GcsPrefix:
|
240 |
+
# gcs_prefix = documentai.GcsPrefix(gcs_uri_prefix=GCS_INPUT_URI_PREFIX)
|
241 |
+
# input_config = documentai.BatchDocumentsInputConfig(gcs_prefix=gcs_prefix, mime_type="application/pdf")
|
242 |
+
|
243 |
+
# 4. Create the DocumentOutputConfig
|
244 |
+
# GCS_OUTPUT_URI should be a gs:// URI prefix where the output JSONs will be stored
|
245 |
+
output_config = documentai.DocumentOutputConfig(
|
246 |
+
gcs_output_config=documentai.DocumentOutputConfig.GcsOutputConfig(
|
247 |
+
gcs_uri=GCS_OUTPUT_URI
|
248 |
+
)
|
249 |
+
)
|
250 |
+
|
251 |
+
# 5. Construct the BatchProcessRequest
|
252 |
+
request = documentai.BatchProcessRequest(
|
253 |
+
name=processor_name,
|
254 |
+
input_documents=input_config, # Use 'input_documents'
|
255 |
+
document_output_config=output_config, # Use 'document_output_config'
|
256 |
+
)
|
257 |
+
|
258 |
+
# Submit the batch process request (this is a long-running operation)
|
259 |
+
operation = docai_client.batch_process_documents(request)
|
260 |
+
|
261 |
+
print("Batch processing operation started. Waiting for completion...")
|
262 |
+
while not operation.done():
|
263 |
+
time.sleep(15) # Wait for 30 seconds before checking again
|
264 |
+
print("Waiting...")
|
265 |
+
|
266 |
+
print("Batch processing operation finished.")
|
267 |
+
|
268 |
+
# --- Download the results from GCS ---
|
269 |
+
storage_client = storage.Client(
|
270 |
+
project=GCP_PROJECT
|
271 |
+
) # Uses GOOGLE_APPLICATION_CREDENTIALS/ADC
|
272 |
+
bucket = storage_client.bucket(GCS_BUCKET_NAME)
|
273 |
+
|
274 |
+
output_blobs = storage_client.list_blobs(
|
275 |
+
GCS_BUCKET_NAME, prefix=GCS_OUTPUT_PREFIX
|
276 |
+
)
|
277 |
+
|
278 |
+
downloaded_files_texts = []
|
279 |
+
try:
|
280 |
+
for blob in output_blobs:
|
281 |
+
# Document AI adds suffixes and subdirectories. Look for the actual JSON output files.
|
282 |
+
# The exact naming depends on the processor and options. Common pattern is ending with .json
|
283 |
+
if blob.name.endswith(".json"):
|
284 |
+
local_download_path = os.path.basename(
|
285 |
+
blob.name
|
286 |
+
) # Download to current directory with blob name
|
287 |
+
print(f"Downloading {blob.name} to {local_download_path}...")
|
288 |
+
blob.download_to_filename(local_download_path)
|
289 |
+
|
290 |
+
with open(local_download_path, "r", encoding="utf-8") as f:
|
291 |
+
document_data = json.load(f)
|
292 |
+
|
293 |
+
# The top-level 'text' field contains the concatenated plain text.
|
294 |
+
if "text" in document_data and document_data["text"] is not None:
|
295 |
+
raw_text = document_data["text"]
|
296 |
+
print(f"\n--- Raw Text Extracted from {blob.name} ---")
|
297 |
+
# Print only a snippet or process as needed
|
298 |
+
print(
|
299 |
+
raw_text[:1000] + "..."
|
300 |
+
if len(raw_text) > 1000
|
301 |
+
else raw_text
|
302 |
+
)
|
303 |
+
print("--------------------------------------------")
|
304 |
+
|
305 |
+
return raw_text
|
306 |
+
|
307 |
+
# Optional: Store the text. If you processed a batch of files,
|
308 |
+
# you might want to associate the text with the original file name.
|
309 |
+
# Document AI metadata might link output JSONs back to input files.
|
310 |
+
# For simplicity here, let's just show the extraction.
|
311 |
+
# If you know it was a single input PDF, this is all the text.
|
312 |
+
# If it was multiple, you'd need a mapping or process each JSON.
|
313 |
+
|
314 |
+
else:
|
315 |
+
print(
|
316 |
+
f"Warning: 'text' field not found in {blob.name} or is empty."
|
317 |
+
)
|
318 |
+
|
319 |
+
# Optional: Read and print a snippet of the JSON content
|
320 |
+
# with open(local_download_path, 'r', encoding='utf-8') as f:
|
321 |
+
# data = json.load(f)
|
322 |
+
# # Print some extracted text, for example (structure varies by processor)
|
323 |
+
# if 'text' in data:
|
324 |
+
# print(f"Extracted text snippet: {data['text'][:500]}...") # Print first 500 chars
|
325 |
+
# elif 'entities' in data:
|
326 |
+
# print(f"Number of entities found: {len(data['entities'])}")
|
327 |
+
# else:
|
328 |
+
# print("Output JSON structure not immediately recognizable.")
|
329 |
+
# break # Uncomment if you only expect/need to process the first output file
|
330 |
+
|
331 |
+
if len(downloaded_files_texts) == 0 or not downloaded_files_texts:
|
332 |
+
print("No JSON output files found in the specified output location.")
|
333 |
+
|
334 |
+
except Exception as e:
|
335 |
+
print(f"Error listing or downloading output files: {e}")
|
336 |
+
|
337 |
+
print("\nProcess complete.")
|
338 |
+
if downloaded_files_texts:
|
339 |
+
print(f"Downloaded output file(s): {', '.join(downloaded_files_texts)}")
|
340 |
+
print("These files contain the OCR results in JSON format.")
|
341 |
+
else:
|
342 |
+
print("No output files were successfully downloaded.")
|
_utils/langchain_utils/Vector_store_class.py
CHANGED
@@ -22,6 +22,8 @@ class VectorStore:
|
|
22 |
axiom_instance: Axiom,
|
23 |
) -> Tuple[Chroma, BM25Okapi, List[str]]:
|
24 |
"""Create vector store and BM25 index with contextualized chunks"""
|
|
|
|
|
25 |
try:
|
26 |
# Prepare texts with context
|
27 |
if is_contextualized_chunk:
|
@@ -69,5 +71,9 @@ class VectorStore:
|
|
69 |
return vector_store, bm25, chunk_ids
|
70 |
|
71 |
except Exception as e:
|
|
|
|
|
|
|
|
|
72 |
self.logger.error(f"Error creating enhanced vector store: {str(e)}")
|
73 |
-
|
|
|
22 |
axiom_instance: Axiom,
|
23 |
) -> Tuple[Chroma, BM25Okapi, List[str]]:
|
24 |
"""Create vector store and BM25 index with contextualized chunks"""
|
25 |
+
contador_erro = 0
|
26 |
+
|
27 |
try:
|
28 |
# Prepare texts with context
|
29 |
if is_contextualized_chunk:
|
|
|
71 |
return vector_store, bm25, chunk_ids
|
72 |
|
73 |
except Exception as e:
|
74 |
+
contador_erro += 1
|
75 |
+
if contador_erro >= 2:
|
76 |
+
raise Exception(f"Error creating enhanced vector store: {str(e)}")
|
77 |
+
|
78 |
self.logger.error(f"Error creating enhanced vector store: {str(e)}")
|
79 |
+
return self.create_enhanced_vector_store(chunks, False, axiom_instance)
|
_utils/ragas.py
CHANGED
@@ -76,7 +76,7 @@ def test_ragas(serializer, listaPDFs):
|
|
76 |
|
77 |
def generate_summary(vector_store, bm25, chunk_ids, query, summarizer):
|
78 |
"""Generates an enhanced summary using the vector store and BM25 index."""
|
79 |
-
structured_summaries = summarizer.
|
80 |
vector_store, bm25, chunk_ids, query
|
81 |
)
|
82 |
return {"structured_summaries": structured_summaries}
|
|
|
76 |
|
77 |
def generate_summary(vector_store, bm25, chunk_ids, query, summarizer):
|
78 |
"""Generates an enhanced summary using the vector store and BM25 index."""
|
79 |
+
structured_summaries = summarizer.do_last_requests(
|
80 |
vector_store, bm25, chunk_ids, query
|
81 |
)
|
82 |
return {"structured_summaries": structured_summaries}
|
entrypoint.sh
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
set -e # Exit immediately if a command exits with a non-zero status.
|
3 |
+
|
4 |
+
CREDENTIALS_FILE_PATH="/app/vella_gcp_luan_credentials.json"
|
5 |
+
|
6 |
+
# Check if the GCP_CREDENTIALS_JSON_CONTENT secret is provided
|
7 |
+
if [ -n "$GCP_CREDENTIALS_JSON_CONTENT" ]; then
|
8 |
+
echo "GCP_CREDENTIALS_JSON_CONTENT secret found. Writing to $CREDENTIALS_FILE_PATH"
|
9 |
+
# Use printf to preserve newlines and special characters correctly
|
10 |
+
printf "%s" "$GCP_CREDENTIALS_JSON_CONTENT" > "$CREDENTIALS_FILE_PATH"
|
11 |
+
export GOOGLE_APPLICATION_CREDENTIALS="$CREDENTIALS_FILE_PATH"
|
12 |
+
echo "GOOGLE_APPLICATION_CREDENTIALS set to $CREDENTIALS_FILE_PATH"
|
13 |
+
|
14 |
+
# Optional: Add a check to see if the file looks like JSON (basic check)
|
15 |
+
if command -v jq >/dev/null && jq -e . "$CREDENTIALS_FILE_PATH" >/dev/null 2>&1; then
|
16 |
+
echo "Credentials file appears to be valid JSON."
|
17 |
+
else
|
18 |
+
echo "Warning: Credentials file may not be valid JSON. Content:"
|
19 |
+
# cat "$CREDENTIALS_FILE_PATH" # Print the content for debugging
|
20 |
+
fi
|
21 |
+
|
22 |
+
else
|
23 |
+
echo "Warning: GCP_CREDENTIALS_JSON_CONTENT secret not found. GCP services might not authenticate."
|
24 |
+
fi
|
25 |
+
|
26 |
+
exec "$@"
|
gerar_documento/serializer.py
CHANGED
@@ -73,7 +73,7 @@ class FileInfoSerializerData:
|
|
73 |
class GerarDocumentoSerializer(
|
74 |
GerarDocumentoInitialSerializer, GerarDocumentoParametros
|
75 |
):
|
76 |
-
files = serializers.
|
77 |
bubble_editor_version = serializers.CharField(
|
78 |
required=False, default="version-test"
|
79 |
) # Será o valor utilizado dentro da URL da requisição pro Bubble
|
@@ -89,7 +89,7 @@ class GerarDocumentoSerializer(
|
|
89 |
class GerarDocumentoSerializerData(
|
90 |
GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
|
91 |
):
|
92 |
-
files:
|
93 |
bubble_editor_version: str = "version-test"
|
94 |
|
95 |
doc_id: str = ""
|
@@ -100,6 +100,8 @@ class GerarDocumentoSerializerData(
|
|
100 |
class GerarDocumentoComPDFProprioSerializer(
|
101 |
GerarDocumentoInitialSerializer, GerarDocumentoParametros
|
102 |
):
|
|
|
|
|
103 |
def get_obj(self):
|
104 |
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
|
105 |
|
@@ -108,17 +110,4 @@ class GerarDocumentoComPDFProprioSerializer(
|
|
108 |
class GerarDocumentoComPDFProprioSerializerData(
|
109 |
GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
|
110 |
):
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
class GerarEmentaSerializer(serializers.Serializer):
|
115 |
-
files = serializers.ListField(child=FileInfoSerializer(), required=True)
|
116 |
-
user_message = serializers.CharField(required=False, default="")
|
117 |
-
chunk_size = serializers.IntegerField(required=False, default=3500)
|
118 |
-
chunk_overlap = serializers.IntegerField(required=False, default=800)
|
119 |
-
bubble_editor_version = serializers.CharField(
|
120 |
-
required=False, default="version-test"
|
121 |
-
) # Será o valor utilizado dentro da URL da requisição pro Bubble
|
122 |
-
doc_id = serializers.CharField(required=True)
|
123 |
-
form_response_id = serializers.CharField(required=True)
|
124 |
-
version = serializers.CharField(required=True)
|
|
|
73 |
class GerarDocumentoSerializer(
|
74 |
GerarDocumentoInitialSerializer, GerarDocumentoParametros
|
75 |
):
|
76 |
+
files = serializers.CharField(required=True)
|
77 |
bubble_editor_version = serializers.CharField(
|
78 |
required=False, default="version-test"
|
79 |
) # Será o valor utilizado dentro da URL da requisição pro Bubble
|
|
|
89 |
class GerarDocumentoSerializerData(
|
90 |
GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
|
91 |
):
|
92 |
+
files: str
|
93 |
bubble_editor_version: str = "version-test"
|
94 |
|
95 |
doc_id: str = ""
|
|
|
100 |
class GerarDocumentoComPDFProprioSerializer(
|
101 |
GerarDocumentoInitialSerializer, GerarDocumentoParametros
|
102 |
):
|
103 |
+
files = serializers.ListField(required=True)
|
104 |
+
|
105 |
def get_obj(self):
|
106 |
return GerarDocumentoSerializerData(**self.validated_data) # type: ignore
|
107 |
|
|
|
110 |
class GerarDocumentoComPDFProprioSerializerData(
|
111 |
GerarDocumentoParametrosData, GerarDocumentoInitialSerializerData
|
112 |
):
|
113 |
+
files: List[FileInfoSerializerData]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gerar_documento/views.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
from typing import Any, Dict, cast
|
2 |
from langchain.prompts import PromptTemplate
|
|
|
3 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
4 |
-
from _utils.custom_exception_handler import
|
5 |
from _utils.gerar_documento_utils.GerarDocumento import GerarDocumento
|
6 |
from _utils.langchain_utils.LLM_class import LLM
|
7 |
from _utils.gerar_documento_utils.utils import (
|
8 |
generate_document_title,
|
9 |
-
gerar_resposta_compilada,
|
10 |
-
get_full_text_and_all_PDFs_chunks,
|
11 |
split_text_by_tokens,
|
12 |
)
|
13 |
from _utils.langchain_utils.Prompt_class import Prompt
|
@@ -29,10 +28,10 @@ from setup.logging import Axiom, send_axiom
|
|
29 |
from .serializer import (
|
30 |
GerarDocumentoComPDFProprioSerializer,
|
31 |
GerarDocumentoSerializer,
|
32 |
-
GerarEmentaSerializer,
|
33 |
)
|
34 |
import asyncio
|
35 |
from _utils.langchain_utils.Splitter_class import Splitter
|
|
|
36 |
|
37 |
|
38 |
class GerarDocumentoView(AsyncAPIView):
|
@@ -60,7 +59,8 @@ class GerarDocumentoView(AsyncAPIView):
|
|
60 |
data = cast(Dict[str, Any], serializer.validated_data)
|
61 |
self.serializer = data
|
62 |
|
63 |
-
listaPDFs =
|
|
|
64 |
|
65 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
66 |
|
@@ -138,17 +138,23 @@ class GerarEmentaView(AsyncAPIView):
|
|
138 |
|
139 |
async def proccess_data_after_response():
|
140 |
try:
|
|
|
|
|
141 |
data = cast(Dict[str, Any], serializer.validated_data)
|
142 |
self.serializer = data
|
143 |
|
144 |
-
gerar_documento_instance = GerarDocumento(
|
|
|
|
|
145 |
|
146 |
-
listaPDFs = [l["link_arquivo"] for l in data["files"]]
|
|
|
|
|
147 |
|
148 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
149 |
|
150 |
all_PDFs_chunks, full_text_as_array = (
|
151 |
-
await get_full_text_and_all_PDFs_chunks(
|
152 |
listaPDFs,
|
153 |
Splitter(obj.chunk_size, obj.chunk_overlap),
|
154 |
False,
|
@@ -208,7 +214,7 @@ class GerarEmentaView(AsyncAPIView):
|
|
208 |
)
|
209 |
except Exception as e:
|
210 |
print(f"ERRO GERAR EMENTA: {e}")
|
211 |
-
|
212 |
e, serializer, self.axiom_instance
|
213 |
)
|
214 |
raise
|
@@ -239,6 +245,8 @@ class GerarEmentaComPDFProprioView(AsyncAPIView):
|
|
239 |
f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
|
240 |
)
|
241 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
|
|
|
|
242 |
if serializer.is_valid(raise_exception=True):
|
243 |
data = cast(Dict[str, Any], serializer.validated_data)
|
244 |
self.axiom_instance.send_axiom(f"data: {data}")
|
@@ -246,7 +254,7 @@ class GerarEmentaComPDFProprioView(AsyncAPIView):
|
|
246 |
serializer_obj = serializer.get_obj()
|
247 |
|
248 |
gerar_documento_instance = GerarDocumento(
|
249 |
-
serializer_obj, self.axiom_instance
|
250 |
)
|
251 |
|
252 |
listaPDFs = handle_pdf_files_from_serializer(
|
@@ -255,7 +263,7 @@ class GerarEmentaComPDFProprioView(AsyncAPIView):
|
|
255 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
256 |
|
257 |
all_PDFs_chunks, full_text_as_array = (
|
258 |
-
await get_full_text_and_all_PDFs_chunks(
|
259 |
listaPDFs,
|
260 |
Splitter(serializer_obj.chunk_size, serializer_obj.chunk_overlap),
|
261 |
False,
|
|
|
1 |
from typing import Any, Dict, cast
|
2 |
from langchain.prompts import PromptTemplate
|
3 |
+
from _utils.Utils_Class import UtilsClass
|
4 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
5 |
+
from _utils.custom_exception_handler import custom_exception_handler_without_api_handler
|
6 |
from _utils.gerar_documento_utils.GerarDocumento import GerarDocumento
|
7 |
from _utils.langchain_utils.LLM_class import LLM
|
8 |
from _utils.gerar_documento_utils.utils import (
|
9 |
generate_document_title,
|
|
|
|
|
10 |
split_text_by_tokens,
|
11 |
)
|
12 |
from _utils.langchain_utils.Prompt_class import Prompt
|
|
|
28 |
from .serializer import (
|
29 |
GerarDocumentoComPDFProprioSerializer,
|
30 |
GerarDocumentoSerializer,
|
|
|
31 |
)
|
32 |
import asyncio
|
33 |
from _utils.langchain_utils.Splitter_class import Splitter
|
34 |
+
import json
|
35 |
|
36 |
|
37 |
class GerarDocumentoView(AsyncAPIView):
|
|
|
59 |
data = cast(Dict[str, Any], serializer.validated_data)
|
60 |
self.serializer = data
|
61 |
|
62 |
+
listaPDFs = json.loads(obj.files)
|
63 |
+
listaPDFs = [l["link_arquivo"] for l in listaPDFs]
|
64 |
|
65 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
66 |
|
|
|
138 |
|
139 |
async def proccess_data_after_response():
|
140 |
try:
|
141 |
+
util = UtilsClass()
|
142 |
+
handle_files = util.handle_files
|
143 |
data = cast(Dict[str, Any], serializer.validated_data)
|
144 |
self.serializer = data
|
145 |
|
146 |
+
gerar_documento_instance = GerarDocumento(
|
147 |
+
obj, True, self.axiom_instance
|
148 |
+
)
|
149 |
|
150 |
+
# listaPDFs = [l["link_arquivo"] for l in data["files"]]
|
151 |
+
listaPDFs = json.loads(obj.files)
|
152 |
+
listaPDFs = [l["link_arquivo"] for l in listaPDFs]
|
153 |
|
154 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
155 |
|
156 |
all_PDFs_chunks, full_text_as_array = (
|
157 |
+
await handle_files.get_full_text_and_all_PDFs_chunks(
|
158 |
listaPDFs,
|
159 |
Splitter(obj.chunk_size, obj.chunk_overlap),
|
160 |
False,
|
|
|
214 |
)
|
215 |
except Exception as e:
|
216 |
print(f"ERRO GERAR EMENTA: {e}")
|
217 |
+
custom_exception_handler_without_api_handler(
|
218 |
e, serializer, self.axiom_instance
|
219 |
)
|
220 |
raise
|
|
|
245 |
f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
|
246 |
)
|
247 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
248 |
+
util = UtilsClass()
|
249 |
+
handle_files = util.handle_files
|
250 |
if serializer.is_valid(raise_exception=True):
|
251 |
data = cast(Dict[str, Any], serializer.validated_data)
|
252 |
self.axiom_instance.send_axiom(f"data: {data}")
|
|
|
254 |
serializer_obj = serializer.get_obj()
|
255 |
|
256 |
gerar_documento_instance = GerarDocumento(
|
257 |
+
serializer_obj, False, self.axiom_instance
|
258 |
)
|
259 |
|
260 |
listaPDFs = handle_pdf_files_from_serializer(
|
|
|
263 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
264 |
|
265 |
all_PDFs_chunks, full_text_as_array = (
|
266 |
+
await handle_files.get_full_text_and_all_PDFs_chunks(
|
267 |
listaPDFs,
|
268 |
Splitter(serializer_obj.chunk_size, serializer_obj.chunk_overlap),
|
269 |
False,
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
setup/installed_apps.py
CHANGED
@@ -16,4 +16,5 @@ INSTALLED_APPS = config_apps + [
|
|
16 |
"modelos_usuarios",
|
17 |
"ragas_api",
|
18 |
"gerar_documento",
|
|
|
19 |
]
|
|
|
16 |
"modelos_usuarios",
|
17 |
"ragas_api",
|
18 |
"gerar_documento",
|
19 |
+
"simple_llm",
|
20 |
]
|
setup/urls.py
CHANGED
@@ -19,4 +19,5 @@ urlpatterns = config_urls + [
|
|
19 |
path("", include("gerar_documento.urls")),
|
20 |
path("", include("ragas_api.urls")),
|
21 |
path("", include("modelos_usuarios.urls")),
|
|
|
22 |
]
|
|
|
19 |
path("", include("gerar_documento.urls")),
|
20 |
path("", include("ragas_api.urls")),
|
21 |
path("", include("modelos_usuarios.urls")),
|
22 |
+
path("", include("simple_llm.urls")),
|
23 |
]
|
simple_llm/__init__.py
ADDED
File without changes
|
simple_llm/admin.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from django.contrib import admin
|
2 |
+
|
3 |
+
# Register your models here.
|
simple_llm/apps.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from django.apps import AppConfig
|
2 |
+
|
3 |
+
|
4 |
+
class SimpleLlmConfig(AppConfig):
|
5 |
+
default_auto_field = 'django.db.models.BigAutoField'
|
6 |
+
name = 'simple_llm'
|
simple_llm/migrations/__init__.py
ADDED
File without changes
|
simple_llm/models.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from django.db import models
|
2 |
+
|
3 |
+
# Create your models here.
|
simple_llm/serializer.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
from typing import List, Optional, Union
|
3 |
+
from rest_framework import serializers
|
4 |
+
from _utils.gerar_documento_utils.prompts import (
|
5 |
+
prompt_gerar_documento,
|
6 |
+
)
|
7 |
+
from gerar_documento.serializer import FileInfoSerializer, FileInfoSerializerData
|
8 |
+
from gerar_documento.serializer_base import (
|
9 |
+
GerarDocumentoParametros,
|
10 |
+
GerarDocumentoParametrosData,
|
11 |
+
)
|
12 |
+
from setup.environment import default_model
|
13 |
+
from django.core.files.uploadedfile import UploadedFile
|
14 |
+
|
15 |
+
user_message = "What are the main points of this document?"
|
16 |
+
|
17 |
+
|
18 |
+
class SimpleLLMInitialSerializer(serializers.Serializer):
|
19 |
+
files = serializers.ListField(child=serializers.FileField(), required=False)
|
20 |
+
user_text = serializers.CharField(required=False, default=user_message)
|
21 |
+
model = serializers.CharField(required=False, default=default_model)
|
22 |
+
prompt = serializers.CharField(required=False, default=prompt_gerar_documento)
|
23 |
+
llm_ultimas_requests = serializers.CharField(
|
24 |
+
required=False, default="gemini-2.0-flash"
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
@dataclass
|
29 |
+
class SimpleLLMInitialSerializerData:
|
30 |
+
files: List[dict] = field(default_factory=list)
|
31 |
+
user_text: str = ""
|
32 |
+
model: str = default_model
|
33 |
+
prompt: str = ""
|
34 |
+
llm_ultimas_requests: str = "gemini-2.0-flash"
|
35 |
+
|
36 |
+
|
37 |
+
class SimpleLLMSerializer(SimpleLLMInitialSerializer):
|
38 |
+
files = serializers.ListField(child=FileInfoSerializer(), required=False)
|
39 |
+
bubble_editor_version = serializers.CharField(
|
40 |
+
required=False, default="version-test"
|
41 |
+
) # Será o valor utilizado dentro da URL da requisição pro Bubble
|
42 |
+
doc_id = serializers.CharField(required=True)
|
43 |
+
form_response_id = serializers.CharField(required=True)
|
44 |
+
version = serializers.CharField(required=True)
|
45 |
+
|
46 |
+
def get_obj(self):
|
47 |
+
return SimpleSerializerData(**self.validated_data) # type: ignore
|
48 |
+
|
49 |
+
|
50 |
+
@dataclass
|
51 |
+
class SimpleSerializerData(SimpleLLMInitialSerializerData):
|
52 |
+
files: List[FileInfoSerializerData] = field(default_factory=list)
|
53 |
+
bubble_editor_version: str = "version-test"
|
54 |
+
|
55 |
+
doc_id: str = ""
|
56 |
+
form_response_id: str = ""
|
57 |
+
version: str = ""
|
simple_llm/tests.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from django.test import TestCase
|
2 |
+
|
3 |
+
# Create your tests here.
|
simple_llm/urls.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from django.urls import path
|
2 |
+
|
3 |
+
from simple_llm.views import SimpleLLMView
|
4 |
+
|
5 |
+
urlpatterns = [
|
6 |
+
path(
|
7 |
+
"llm",
|
8 |
+
SimpleLLMView.as_view(),
|
9 |
+
name="simple-llm",
|
10 |
+
),
|
11 |
+
]
|
simple_llm/views.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
from _utils.custom_exception_handler import custom_exception_handler_without_api_handler
|
3 |
+
from _utils.gerar_documento import gerar_documento
|
4 |
+
from _utils.gerar_documento_utils.GerarDocumento import (
|
5 |
+
GerarDocumento,
|
6 |
+
GerarDocumentoUtils,
|
7 |
+
)
|
8 |
+
from _utils.langchain_utils.Prompt_class import Prompt
|
9 |
+
from _utils.utils import convert_markdown_to_HTML
|
10 |
+
from setup.logging import Axiom
|
11 |
+
from setup.easy_imports import (
|
12 |
+
Response,
|
13 |
+
AsyncAPIView,
|
14 |
+
extend_schema,
|
15 |
+
)
|
16 |
+
from simple_llm.serializer import SimpleLLMSerializer
|
17 |
+
|
18 |
+
|
19 |
+
class SimpleLLMView(AsyncAPIView):
|
20 |
+
# parser_classes = [MultiPartParser]
|
21 |
+
serializer = {}
|
22 |
+
axiom_instance = Axiom()
|
23 |
+
|
24 |
+
@extend_schema(
|
25 |
+
request=SimpleLLMSerializer,
|
26 |
+
)
|
27 |
+
async def post(self, request):
|
28 |
+
try:
|
29 |
+
self.axiom_instance.generate_new_uuid()
|
30 |
+
print(f"\n\nDATA E HORA DA REQUISIÇÃO: {datetime.now()}")
|
31 |
+
self.axiom_instance.send_axiom(
|
32 |
+
f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
|
33 |
+
)
|
34 |
+
serializer = SimpleLLMSerializer(data=request.data)
|
35 |
+
if serializer.is_valid(raise_exception=True):
|
36 |
+
obj = serializer.get_obj() # type: ignore
|
37 |
+
if not serializer.validated_data:
|
38 |
+
raise ValueError("Erro no validated_data")
|
39 |
+
|
40 |
+
self.serializer = obj
|
41 |
+
|
42 |
+
listaPDFs = [l.link_arquivo for l in obj.files]
|
43 |
+
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
44 |
+
|
45 |
+
summarizer = GerarDocumentoUtils(self.axiom_instance)
|
46 |
+
|
47 |
+
prompt_instance = Prompt()
|
48 |
+
prompt = prompt_instance.create_and_invoke_prompt(
|
49 |
+
obj.prompt,
|
50 |
+
dynamic_dict={"context": obj.user_text},
|
51 |
+
)
|
52 |
+
|
53 |
+
resposta_llm = (
|
54 |
+
await summarizer.checar_se_resposta_vazia_do_documento_final(
|
55 |
+
obj.llm_ultimas_requests, prompt.to_string()
|
56 |
+
)
|
57 |
+
)
|
58 |
+
self.axiom_instance.send_axiom(f"resposta_llm: {resposta_llm}")
|
59 |
+
|
60 |
+
texto_completo_como_html = convert_markdown_to_HTML(
|
61 |
+
resposta_llm
|
62 |
+
).replace("resposta_segunda_etapa:", "<br><br>")
|
63 |
+
|
64 |
+
self.axiom_instance.send_axiom(
|
65 |
+
f"texto_completo_como_html: {texto_completo_como_html}"
|
66 |
+
)
|
67 |
+
|
68 |
+
return Response({"resposta": texto_completo_como_html})
|
69 |
+
except Exception as e:
|
70 |
+
custom_exception_handler_without_api_handler(
|
71 |
+
e, serializer, self.axiom_instance
|
72 |
+
)
|
73 |
+
raise
|