Spaces:
Running
Running
luanpoppe
commited on
Commit
·
d514965
1
Parent(s):
99fb68e
feat: criando classe de utilitários
Browse files- .gitignore +2 -1
- _utils/Handle_Files_Class.py +28 -0
- _utils/Utils_Class.py +13 -0
- _utils/files/ragas_testset.csv +0 -18
- _utils/gerar_documento.py +25 -32
- _utils/gerar_documento_utils/GerarDocumento.py +18 -13
- _utils/gerar_documento_utils/utils.py +0 -20
- gerar_documento/views.py +7 -3
.gitignore
CHANGED
@@ -172,4 +172,5 @@ cython_debug/
|
|
172 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
173 |
#.idea/
|
174 |
|
175 |
-
# End of https://www.toptal.com/developers/gitignore/api/django
|
|
|
|
172 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
173 |
#.idea/
|
174 |
|
175 |
+
# End of https://www.toptal.com/developers/gitignore/api/django
|
176 |
+
vella_gcp_luan_credentials.json
|
_utils/Handle_Files_Class.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import List, Tuple
|
3 |
+
|
4 |
+
from _utils.langchain_utils.Splitter_class import Splitter
|
5 |
+
from _utils.models.gerar_documento import DocumentChunk
|
6 |
+
|
7 |
+
|
8 |
+
@dataclass
|
9 |
+
class HandleFilesClass:
|
10 |
+
async def get_full_text_and_all_PDFs_chunks(
|
11 |
+
self,
|
12 |
+
listaPDFs: List[str],
|
13 |
+
splitterObject: Splitter,
|
14 |
+
should_use_llama_parse: bool,
|
15 |
+
isBubble: bool,
|
16 |
+
) -> Tuple[List[DocumentChunk], List[str]]:
|
17 |
+
all_PDFs_chunks: List[DocumentChunk] = []
|
18 |
+
|
19 |
+
pages: List[str] = []
|
20 |
+
|
21 |
+
# Load and process document
|
22 |
+
for pdf_path in listaPDFs:
|
23 |
+
chunks, pages = await splitterObject.load_and_split_document(
|
24 |
+
pdf_path, should_use_llama_parse, isBubble
|
25 |
+
)
|
26 |
+
all_PDFs_chunks = all_PDFs_chunks + chunks
|
27 |
+
|
28 |
+
return all_PDFs_chunks, pages
|
_utils/Utils_Class.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
from _utils.Handle_Files_Class import HandleFilesClass
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class UtilsClass:
|
9 |
+
lista_pdfs: List[str] | None = None
|
10 |
+
handle_files = HandleFilesClass()
|
11 |
+
|
12 |
+
def configure_lista_pdfs(self, lista_pdfs):
|
13 |
+
self.lista_pdfs = lista_pdfs
|
_utils/files/ragas_testset.csv
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
position,user_input,reference,response,retrieved_contexts
|
2 |
-
0,What are the global implications of the USA Supreme Court ruling on abortion?,"The global implications of the USA Supreme Court ruling on abortion are significant. The ruling has led to limited or no access to abortion for one in three women and girls of reproductive age in states where abortion access is restricted. These states also have weaker maternal health support, higher maternal death rates, and higher child poverty rates. Additionally, the ruling has had an impact beyond national borders due to the USA's geopolitical and cultural influence globally. Organizations and activists worldwide are concerned that the ruling may inspire anti-abortion legislative and policy attacks in other countries. The ruling has also hindered progressive law reform and the implementation of abortion guidelines in certain African countries. Furthermore, the ruling has created a chilling effect in international policy spaces, empowering anti-abortion actors to undermine human rights protections.","The global implications of the USA Supreme Court ruling on abortion can be significant, as it sets a precedent for other countries and influences the global discourse on reproductive rights. Here are some potential implications:
|
3 |
-
|
4 |
-
1. Influence on other countries: The Supreme Court's ruling can serve as a reference point for other countries grappling with their own abortion laws. It can provide legal arguments and reasoning that advocates for reproductive rights can use to challenge restrictive abortion laws in their respective jurisdictions.
|
5 |
-
|
6 |
-
2. Strengthening of global reproductive rights movements: A favorable ruling by the Supreme Court can energize and empower reproductive rights movements worldwide. It can serve as a rallying point for activists and organizations advocating for women's rights, leading to increased mobilization and advocacy efforts globally.
|
7 |
-
|
8 |
-
3. Counteracting anti-abortion movements: Conversely, a ruling that restricts abortion rights can embolden anti-abortion movements globally. It can provide legitimacy to their arguments and encourage similar restrictive measures in other countries, potentially leading to a rollback of existing reproductive rights.
|
9 |
-
|
10 |
-
4. Impact on international aid and policies: The Supreme Court's ruling can influence international aid and policies related to reproductive health. It can shape the priorities and funding decisions of donor countries and organizations, potentially leading to increased support for reproductive rights initiatives or conversely, restrictions on funding for abortion-related services.
|
11 |
-
|
12 |
-
5. Shaping international human rights standards: The ruling can contribute to the development of international human rights standards regarding reproductive rights. It can influence the interpretation and application of existing human rights treaties and conventions, potentially strengthening the recognition of reproductive rights as fundamental human rights globally.
|
13 |
-
|
14 |
-
6. Global health implications: The Supreme Court's ruling can have implications for global health outcomes, particularly in countries with restrictive abortion laws. It can impact the availability and accessibility of safe and legal abortion services, potentially leading to an increase in unsafe abortions and related health complications.
|
15 |
-
|
16 |
-
It is important to note that the specific implications will depend on the nature of the Supreme Court ruling and the subsequent actions taken by governments, activists, and organizations both within and outside the United States.","[""- In 2022, the USA Supreme Court handed down a decision ruling that overturned 50 years of jurisprudence recognizing a constitutional right to abortion.\n- This decision has had a massive impact: one in three women and girls of reproductive age now live in states where abortion access is either totally or near-totally inaccessible.\n- The states with the most restrictive abortion laws have the weakest maternal health support, higher maternal death rates, and higher child poverty rates.\n- The USA Supreme Court ruling has also had impacts beyond national borders due to the geopolitical and cultural influence wielded by the USA globally and the aid it funds.\n- SRR organizations and activists across the world have expressed fear about the ruling laying the groundwork for anti-abortion legislative and policy attacks in other countries.\n- Advocates have also observed the ruling's impact on progressive law reform and the stalling of the adoption and enforcement of abortion guidelines in certain African countries.\n- The ruling has created a chilling effect in international policy spaces, emboldening anti-abortion state and non-state actors to undermine human rights protections.""
|
17 |
-
'The USA Supreme Court ruling on abortion has sparked intense debates and discussions not only within the country but also around the world. Many countries look to the United States as a leader in legal and social issues, so the decision could potentially influence the policies and attitudes towards abortion in other nations.'
|
18 |
-
""The ruling may also impact international organizations and non-governmental groups that work on reproductive rights and women's health issues. Depending on the outcome, there could be shifts in funding, advocacy efforts, and collaborations with American counterparts, leading to ripple effects in the global fight for reproductive justice.""]"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_utils/gerar_documento.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
from langchain_core.messages import HumanMessage
|
3 |
from typing import Any, Union, cast
|
|
|
4 |
from _utils.langchain_utils.LLM_class import LLM
|
5 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
6 |
from _utils.custom_exception_handler import custom_exception_handler_wihout_api_handler
|
@@ -16,7 +17,6 @@ from _utils.gerar_documento_utils.contextual_retriever import (
|
|
16 |
from _utils.gerar_documento_utils.utils import (
|
17 |
generate_document_title,
|
18 |
gerar_resposta_compilada,
|
19 |
-
get_full_text_and_all_PDFs_chunks,
|
20 |
get_response_from_auxiliar_contextual_prompt,
|
21 |
)
|
22 |
from _utils.models.gerar_documento import (
|
@@ -49,20 +49,23 @@ async def gerar_documento(
|
|
49 |
isBubble=False,
|
50 |
):
|
51 |
try:
|
|
|
|
|
|
|
52 |
contextual_retriever = ContextualRetriever(serializer)
|
53 |
-
|
54 |
-
# Initialize enhanced summarizer
|
55 |
summarizer = GerarDocumento(serializer, axiom_instance)
|
56 |
|
57 |
-
|
58 |
-
listaPDFs,
|
59 |
-
summarizer.splitter,
|
60 |
-
serializer.should_use_llama_parse,
|
61 |
-
isBubble,
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
65 |
)
|
|
|
66 |
|
67 |
is_contextualized_chunk = serializer.should_have_contextual_chunks
|
68 |
|
@@ -70,21 +73,15 @@ async def gerar_documento(
|
|
70 |
response_auxiliar_summary = (
|
71 |
await get_response_from_auxiliar_contextual_prompt(full_text_as_array)
|
72 |
)
|
73 |
-
|
74 |
-
f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}"
|
75 |
-
)
|
76 |
|
77 |
-
|
78 |
contextualized_chunks = await contextual_retriever.contextualize_all_chunks(
|
79 |
all_PDFs_chunks, response_auxiliar_summary, axiom_instance
|
80 |
)
|
81 |
-
|
82 |
-
"TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL"
|
83 |
-
)
|
84 |
chunks_processados = contextualized_chunks
|
85 |
-
|
86 |
-
f"CHUNKS PROCESSADOS INICIALMENTE: {chunks_processados}"
|
87 |
-
)
|
88 |
else:
|
89 |
chunks_processados = all_PDFs_chunks
|
90 |
|
@@ -93,16 +90,14 @@ async def gerar_documento(
|
|
93 |
cast(str, response_auxiliar_summary)
|
94 |
)
|
95 |
|
96 |
-
|
97 |
-
"COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE"
|
98 |
-
)
|
99 |
query_gerado_dinamicamente_para_o_vector_store = (
|
100 |
await llm.google_gemini_ainvoke(
|
101 |
prompt_para_gerar_query_dinamico, "gemini-2.0-flash"
|
102 |
)
|
103 |
)
|
104 |
|
105 |
-
|
106 |
f"query_gerado_dinamicamente_para_o_vector_store: {query_gerado_dinamicamente_para_o_vector_store.content}",
|
107 |
)
|
108 |
|
@@ -114,7 +109,7 @@ async def gerar_documento(
|
|
114 |
)
|
115 |
|
116 |
llm_ultimas_requests = serializer.llm_ultimas_requests
|
117 |
-
|
118 |
structured_summaries = await summarizer.gerar_documento_final(
|
119 |
vector_store,
|
120 |
bm25,
|
@@ -124,7 +119,7 @@ async def gerar_documento(
|
|
124 |
str, query_gerado_dinamicamente_para_o_vector_store.content
|
125 |
), # prompt_auxiliar_SEM_CONTEXT,
|
126 |
)
|
127 |
-
|
128 |
|
129 |
if not isinstance(structured_summaries, list):
|
130 |
from rest_framework.response import Response
|
@@ -141,9 +136,7 @@ async def gerar_documento(
|
|
141 |
texto_completo_como_html = convert_markdown_to_HTML(texto_completo).replace(
|
142 |
"resposta_segunda_etapa:", "<br><br>"
|
143 |
)
|
144 |
-
|
145 |
-
f"texto_completo_como_html: {texto_completo_como_html}"
|
146 |
-
)
|
147 |
|
148 |
if is_contextualized_chunk:
|
149 |
prompt_titulo_do_documento = response_auxiliar_summary
|
@@ -154,7 +147,7 @@ async def gerar_documento(
|
|
154 |
)
|
155 |
|
156 |
if isBubble:
|
157 |
-
|
158 |
enviar_resposta_final(
|
159 |
serializer.doc_id, # type: ignore
|
160 |
serializer.form_response_id, # type: ignore
|
@@ -163,7 +156,7 @@ async def gerar_documento(
|
|
163 |
False,
|
164 |
cast(str, titulo_do_documento),
|
165 |
)
|
166 |
-
|
167 |
|
168 |
return {
|
169 |
"texto_completo": texto_completo_como_html,
|
|
|
1 |
import os
|
2 |
from langchain_core.messages import HumanMessage
|
3 |
from typing import Any, Union, cast
|
4 |
+
from _utils.Utils_Class import UtilsClass
|
5 |
from _utils.langchain_utils.LLM_class import LLM
|
6 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
7 |
from _utils.custom_exception_handler import custom_exception_handler_wihout_api_handler
|
|
|
17 |
from _utils.gerar_documento_utils.utils import (
|
18 |
generate_document_title,
|
19 |
gerar_resposta_compilada,
|
|
|
20 |
get_response_from_auxiliar_contextual_prompt,
|
21 |
)
|
22 |
from _utils.models.gerar_documento import (
|
|
|
49 |
isBubble=False,
|
50 |
):
|
51 |
try:
|
52 |
+
axiom = axiom_instance.send_axiom
|
53 |
+
utils = UtilsClass()
|
54 |
+
handle_files = utils.handle_files
|
55 |
contextual_retriever = ContextualRetriever(serializer)
|
|
|
|
|
56 |
summarizer = GerarDocumento(serializer, axiom_instance)
|
57 |
|
58 |
+
params = {
|
59 |
+
"listaPDFs": listaPDFs,
|
60 |
+
"splitterObject": summarizer.splitter,
|
61 |
+
"should_use_llama_parse": serializer.should_use_llama_parse,
|
62 |
+
"isBubble": isBubble,
|
63 |
+
}
|
64 |
+
|
65 |
+
all_PDFs_chunks, full_text_as_array = (
|
66 |
+
await handle_files.get_full_text_and_all_PDFs_chunks(**params)
|
67 |
)
|
68 |
+
axiom(f"INÍCIO DO TEXTO COMPLETO DOS PDFS: {full_text_as_array[0:5]}")
|
69 |
|
70 |
is_contextualized_chunk = serializer.should_have_contextual_chunks
|
71 |
|
|
|
73 |
response_auxiliar_summary = (
|
74 |
await get_response_from_auxiliar_contextual_prompt(full_text_as_array)
|
75 |
)
|
76 |
+
axiom(f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}")
|
|
|
|
|
77 |
|
78 |
+
axiom("COMEÇANDO A FAZER AS REQUISIÇÕES DO CONTEXTUAL")
|
79 |
contextualized_chunks = await contextual_retriever.contextualize_all_chunks(
|
80 |
all_PDFs_chunks, response_auxiliar_summary, axiom_instance
|
81 |
)
|
82 |
+
axiom("TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL")
|
|
|
|
|
83 |
chunks_processados = contextualized_chunks
|
84 |
+
axiom(f"CHUNKS PROCESSADOS INICIALMENTE: {chunks_processados}")
|
|
|
|
|
85 |
else:
|
86 |
chunks_processados = all_PDFs_chunks
|
87 |
|
|
|
90 |
cast(str, response_auxiliar_summary)
|
91 |
)
|
92 |
|
93 |
+
axiom("COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE")
|
|
|
|
|
94 |
query_gerado_dinamicamente_para_o_vector_store = (
|
95 |
await llm.google_gemini_ainvoke(
|
96 |
prompt_para_gerar_query_dinamico, "gemini-2.0-flash"
|
97 |
)
|
98 |
)
|
99 |
|
100 |
+
axiom(
|
101 |
f"query_gerado_dinamicamente_para_o_vector_store: {query_gerado_dinamicamente_para_o_vector_store.content}",
|
102 |
)
|
103 |
|
|
|
109 |
)
|
110 |
|
111 |
llm_ultimas_requests = serializer.llm_ultimas_requests
|
112 |
+
axiom("COMEÇANDO A FAZER ÚLTIMA REQUISIÇÃO")
|
113 |
structured_summaries = await summarizer.gerar_documento_final(
|
114 |
vector_store,
|
115 |
bm25,
|
|
|
119 |
str, query_gerado_dinamicamente_para_o_vector_store.content
|
120 |
), # prompt_auxiliar_SEM_CONTEXT,
|
121 |
)
|
122 |
+
axiom("TERMINOU DE FAZER A ÚLTIMA REQUISIÇÃO")
|
123 |
|
124 |
if not isinstance(structured_summaries, list):
|
125 |
from rest_framework.response import Response
|
|
|
136 |
texto_completo_como_html = convert_markdown_to_HTML(texto_completo).replace(
|
137 |
"resposta_segunda_etapa:", "<br><br>"
|
138 |
)
|
139 |
+
axiom(f"texto_completo_como_html: {texto_completo_como_html}")
|
|
|
|
|
140 |
|
141 |
if is_contextualized_chunk:
|
142 |
prompt_titulo_do_documento = response_auxiliar_summary
|
|
|
147 |
)
|
148 |
|
149 |
if isBubble:
|
150 |
+
axiom("COMEÇANDO A REQUISIÇÃO FINAL PARA O BUBBLE")
|
151 |
enviar_resposta_final(
|
152 |
serializer.doc_id, # type: ignore
|
153 |
serializer.form_response_id, # type: ignore
|
|
|
156 |
False,
|
157 |
cast(str, titulo_do_documento),
|
158 |
)
|
159 |
+
axiom("TERMINOU A REQUISIÇÃO FINAL PARA O BUBBLE")
|
160 |
|
161 |
return {
|
162 |
"texto_completo": texto_completo_como_html,
|
_utils/gerar_documento_utils/GerarDocumento.py
CHANGED
@@ -26,7 +26,7 @@ from _utils.models.gerar_documento import (
|
|
26 |
from cohere import Client
|
27 |
from _utils.langchain_utils.Splitter_class import Splitter
|
28 |
import time
|
29 |
-
|
30 |
from setup.logging import Axiom
|
31 |
|
32 |
|
@@ -81,10 +81,22 @@ class GerarDocumentoUtils:
|
|
81 |
else:
|
82 |
return documento_gerado
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
class GerarDocumento:
|
86 |
-
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
87 |
-
cohere_api_key = os.environ.get("COHERE_API_KEY", "")
|
88 |
resumo_gerado = ""
|
89 |
gerar_documento_utils = GerarDocumentoUtils()
|
90 |
|
@@ -95,21 +107,14 @@ class GerarDocumento:
|
|
95 |
],
|
96 |
axiom_instance: Axiom,
|
97 |
):
|
98 |
-
self.config =
|
99 |
-
num_chunks=serializer.num_chunks_retrieval,
|
100 |
-
embedding_weight=serializer.embedding_weight,
|
101 |
-
bm25_weight=serializer.bm25_weight,
|
102 |
-
context_window=serializer.context_window,
|
103 |
-
chunk_overlap=serializer.chunk_overlap,
|
104 |
-
)
|
105 |
self.logger = logging.getLogger(__name__)
|
106 |
# self.prompt_auxiliar = prompt_auxiliar
|
107 |
self.gpt_model = serializer.model
|
108 |
self.gpt_temperature = serializer.gpt_temperature
|
109 |
self.prompt_gerar_documento = serializer.prompt_gerar_documento
|
110 |
|
111 |
-
self.
|
112 |
-
self.cohere_client = Client(self.cohere_api_key)
|
113 |
self.embeddings = HuggingFaceEmbeddings(model_name=serializer.hf_embedding)
|
114 |
self.num_k_rerank = serializer.num_k_rerank
|
115 |
self.model_cohere_rerank = serializer.model_cohere_rerank
|
@@ -219,7 +224,7 @@ class GerarDocumento:
|
|
219 |
llm = ChatOpenAI(
|
220 |
temperature=self.gpt_temperature,
|
221 |
model=self.gpt_model,
|
222 |
-
api_key=SecretStr(
|
223 |
)
|
224 |
elif llm_ultimas_requests == "deepseek-chat":
|
225 |
llm = llm_instance.deepseek()
|
|
|
26 |
from cohere import Client
|
27 |
from _utils.langchain_utils.Splitter_class import Splitter
|
28 |
import time
|
29 |
+
from setup.tokens import openai_api_key, cohere_api_key
|
30 |
from setup.logging import Axiom
|
31 |
|
32 |
|
|
|
81 |
else:
|
82 |
return documento_gerado
|
83 |
|
84 |
+
def create_retrieval_config(
|
85 |
+
self,
|
86 |
+
serializer: Union[
|
87 |
+
GerarDocumentoSerializerData, GerarDocumentoComPDFProprioSerializerData, Any
|
88 |
+
],
|
89 |
+
):
|
90 |
+
return RetrievalConfig(
|
91 |
+
num_chunks=serializer.num_chunks_retrieval,
|
92 |
+
embedding_weight=serializer.embedding_weight,
|
93 |
+
bm25_weight=serializer.bm25_weight,
|
94 |
+
context_window=serializer.context_window,
|
95 |
+
chunk_overlap=serializer.chunk_overlap,
|
96 |
+
)
|
97 |
+
|
98 |
|
99 |
class GerarDocumento:
|
|
|
|
|
100 |
resumo_gerado = ""
|
101 |
gerar_documento_utils = GerarDocumentoUtils()
|
102 |
|
|
|
107 |
],
|
108 |
axiom_instance: Axiom,
|
109 |
):
|
110 |
+
self.config = self.gerar_documento_utils.create_retrieval_config(serializer)
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
self.logger = logging.getLogger(__name__)
|
112 |
# self.prompt_auxiliar = prompt_auxiliar
|
113 |
self.gpt_model = serializer.model
|
114 |
self.gpt_temperature = serializer.gpt_temperature
|
115 |
self.prompt_gerar_documento = serializer.prompt_gerar_documento
|
116 |
|
117 |
+
self.cohere_client = Client(cohere_api_key)
|
|
|
118 |
self.embeddings = HuggingFaceEmbeddings(model_name=serializer.hf_embedding)
|
119 |
self.num_k_rerank = serializer.num_k_rerank
|
120 |
self.model_cohere_rerank = serializer.model_cohere_rerank
|
|
|
224 |
llm = ChatOpenAI(
|
225 |
temperature=self.gpt_temperature,
|
226 |
model=self.gpt_model,
|
227 |
+
api_key=SecretStr(openai_api_key),
|
228 |
)
|
229 |
elif llm_ultimas_requests == "deepseek-chat":
|
230 |
llm = llm_instance.deepseek()
|
_utils/gerar_documento_utils/utils.py
CHANGED
@@ -101,26 +101,6 @@ def split_text_by_tokens(full_text: str):
|
|
101 |
return text_chunks
|
102 |
|
103 |
|
104 |
-
async def get_full_text_and_all_PDFs_chunks(
|
105 |
-
listaPDFs: List[str],
|
106 |
-
splitterObject: Splitter,
|
107 |
-
should_use_llama_parse: bool,
|
108 |
-
isBubble: bool,
|
109 |
-
) -> Tuple[List[DocumentChunk], List[str]]:
|
110 |
-
all_PDFs_chunks: List[DocumentChunk] = []
|
111 |
-
|
112 |
-
pages: List[str] = []
|
113 |
-
|
114 |
-
# Load and process document
|
115 |
-
for pdf_path in listaPDFs:
|
116 |
-
chunks, pages = await splitterObject.load_and_split_document(
|
117 |
-
pdf_path, should_use_llama_parse, isBubble
|
118 |
-
)
|
119 |
-
all_PDFs_chunks = all_PDFs_chunks + chunks
|
120 |
-
|
121 |
-
return all_PDFs_chunks, pages
|
122 |
-
|
123 |
-
|
124 |
async def generate_document_title(resumo_para_gerar_titulo: str):
|
125 |
prompt = f"Você é um assistente jurídico e irá receber abaixo o resumo de um documento jurídico. Quero que você gere um título para este documento. Mande como resposta apenas o título gerado, nada mais. Aqui está um título de exemplo pra você se basear ao criar um novo: <titulo_de_exemplo>Ação Penal por Furto Qualificado nº 0002269-86.2009.805.0032<titulo_de_exemplo>\n\nSegue abaixo o resumo do documento jurídico:\n{resumo_para_gerar_titulo}"
|
126 |
response = await agemini_answer(prompt, "gemini-2.0-flash-lite")
|
|
|
101 |
return text_chunks
|
102 |
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
async def generate_document_title(resumo_para_gerar_titulo: str):
|
105 |
prompt = f"Você é um assistente jurídico e irá receber abaixo o resumo de um documento jurídico. Quero que você gere um título para este documento. Mande como resposta apenas o título gerado, nada mais. Aqui está um título de exemplo pra você se basear ao criar um novo: <titulo_de_exemplo>Ação Penal por Furto Qualificado nº 0002269-86.2009.805.0032<titulo_de_exemplo>\n\nSegue abaixo o resumo do documento jurídico:\n{resumo_para_gerar_titulo}"
|
106 |
response = await agemini_answer(prompt, "gemini-2.0-flash-lite")
|
gerar_documento/views.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from typing import Any, Dict, cast
|
2 |
from langchain.prompts import PromptTemplate
|
|
|
3 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
4 |
from _utils.custom_exception_handler import custom_exception_handler_wihout_api_handler
|
5 |
from _utils.gerar_documento_utils.GerarDocumento import GerarDocumento
|
@@ -7,7 +8,6 @@ from _utils.langchain_utils.LLM_class import LLM
|
|
7 |
from _utils.gerar_documento_utils.utils import (
|
8 |
generate_document_title,
|
9 |
gerar_resposta_compilada,
|
10 |
-
get_full_text_and_all_PDFs_chunks,
|
11 |
split_text_by_tokens,
|
12 |
)
|
13 |
from _utils.langchain_utils.Prompt_class import Prompt
|
@@ -138,6 +138,8 @@ class GerarEmentaView(AsyncAPIView):
|
|
138 |
|
139 |
async def proccess_data_after_response():
|
140 |
try:
|
|
|
|
|
141 |
data = cast(Dict[str, Any], serializer.validated_data)
|
142 |
self.serializer = data
|
143 |
|
@@ -148,7 +150,7 @@ class GerarEmentaView(AsyncAPIView):
|
|
148 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
149 |
|
150 |
all_PDFs_chunks, full_text_as_array = (
|
151 |
-
await get_full_text_and_all_PDFs_chunks(
|
152 |
listaPDFs,
|
153 |
Splitter(obj.chunk_size, obj.chunk_overlap),
|
154 |
False,
|
@@ -239,6 +241,8 @@ class GerarEmentaComPDFProprioView(AsyncAPIView):
|
|
239 |
f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
|
240 |
)
|
241 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
|
|
|
|
242 |
if serializer.is_valid(raise_exception=True):
|
243 |
data = cast(Dict[str, Any], serializer.validated_data)
|
244 |
self.axiom_instance.send_axiom(f"data: {data}")
|
@@ -255,7 +259,7 @@ class GerarEmentaComPDFProprioView(AsyncAPIView):
|
|
255 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
256 |
|
257 |
all_PDFs_chunks, full_text_as_array = (
|
258 |
-
await get_full_text_and_all_PDFs_chunks(
|
259 |
listaPDFs,
|
260 |
Splitter(serializer_obj.chunk_size, serializer_obj.chunk_overlap),
|
261 |
False,
|
|
|
1 |
from typing import Any, Dict, cast
|
2 |
from langchain.prompts import PromptTemplate
|
3 |
+
from _utils.Utils_Class import UtilsClass
|
4 |
from _utils.bubble_integrations.enviar_resposta_final import enviar_resposta_final
|
5 |
from _utils.custom_exception_handler import custom_exception_handler_wihout_api_handler
|
6 |
from _utils.gerar_documento_utils.GerarDocumento import GerarDocumento
|
|
|
8 |
from _utils.gerar_documento_utils.utils import (
|
9 |
generate_document_title,
|
10 |
gerar_resposta_compilada,
|
|
|
11 |
split_text_by_tokens,
|
12 |
)
|
13 |
from _utils.langchain_utils.Prompt_class import Prompt
|
|
|
138 |
|
139 |
async def proccess_data_after_response():
|
140 |
try:
|
141 |
+
util = UtilsClass()
|
142 |
+
handle_files = util.handle_files
|
143 |
data = cast(Dict[str, Any], serializer.validated_data)
|
144 |
self.serializer = data
|
145 |
|
|
|
150 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
151 |
|
152 |
all_PDFs_chunks, full_text_as_array = (
|
153 |
+
await handle_files.get_full_text_and_all_PDFs_chunks(
|
154 |
listaPDFs,
|
155 |
Splitter(obj.chunk_size, obj.chunk_overlap),
|
156 |
False,
|
|
|
241 |
f"COMEÇOU NOVA REQUISIÇÃO - request.data: {request.data}"
|
242 |
)
|
243 |
serializer = GerarDocumentoComPDFProprioSerializer(data=request.data)
|
244 |
+
util = UtilsClass()
|
245 |
+
handle_files = util.handle_files
|
246 |
if serializer.is_valid(raise_exception=True):
|
247 |
data = cast(Dict[str, Any], serializer.validated_data)
|
248 |
self.axiom_instance.send_axiom(f"data: {data}")
|
|
|
259 |
self.axiom_instance.send_axiom(f"listaPDFs: {listaPDFs}")
|
260 |
|
261 |
all_PDFs_chunks, full_text_as_array = (
|
262 |
+
await handle_files.get_full_text_and_all_PDFs_chunks(
|
263 |
listaPDFs,
|
264 |
Splitter(serializer_obj.chunk_size, serializer_obj.chunk_overlap),
|
265 |
False,
|