luanpoppe commited on
Commit
449ce0a
·
1 Parent(s): 99fb68e

feat: adicionando primeiro rascunho

Browse files
.env.example CHANGED
@@ -11,4 +11,5 @@ LLAMA_CLOUD_API_KEY_PEIXE=""
11
  DEEPSEEKK_API_KEY=""
12
  GOOGLE_API_KEY_PEIXE=""
13
  SENTRY_DSN=""
14
- AMBIENTE="testes"
 
 
11
  DEEPSEEKK_API_KEY=""
12
  GOOGLE_API_KEY_PEIXE=""
13
  SENTRY_DSN=""
14
+ AMBIENTE="testes"
15
+ GOOGLE_APPLICATION_CREDENTIALS=""
.gitignore CHANGED
@@ -172,4 +172,5 @@ cython_debug/
172
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
173
  #.idea/
174
 
175
- # End of https://www.toptal.com/developers/gitignore/api/django
 
 
172
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
173
  #.idea/
174
 
175
+ # End of https://www.toptal.com/developers/gitignore/api/django
176
+ vella_gcp_luan_credentials.json
_utils/gerar_documento.py CHANGED
@@ -54,89 +54,101 @@ async def gerar_documento(
54
  # Initialize enhanced summarizer
55
  summarizer = GerarDocumento(serializer, axiom_instance)
56
 
57
- all_PDFs_chunks, full_text_as_array = await get_full_text_and_all_PDFs_chunks(
58
- listaPDFs,
59
- summarizer.splitter,
60
- serializer.should_use_llama_parse,
61
- isBubble,
 
 
62
  )
63
  axiom_instance.send_axiom(
64
  f"INÍCIO DO TEXTO COMPLETO DOS PDFS: {full_text_as_array[0:5]}"
65
  )
66
 
67
- is_contextualized_chunk = serializer.should_have_contextual_chunks
68
-
69
- if is_contextualized_chunk:
70
- response_auxiliar_summary = (
71
- await get_response_from_auxiliar_contextual_prompt(full_text_as_array)
72
- )
73
- axiom_instance.send_axiom(
74
- f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
 
77
- axiom_instance.send_axiom("COMEÇANDO A FAZER AS REQUISIÇÕES DO CONTEXTUAL")
78
- contextualized_chunks = await contextual_retriever.contextualize_all_chunks(
79
- all_PDFs_chunks, response_auxiliar_summary, axiom_instance
80
- )
81
  axiom_instance.send_axiom(
82
- "TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL"
83
  )
84
- chunks_processados = contextualized_chunks
85
- axiom_instance.send_axiom(
86
- f"CHUNKS PROCESSADOS INICIALMENTE: {chunks_processados}"
 
87
  )
88
- else:
89
- chunks_processados = all_PDFs_chunks
90
-
91
- llm = LLM()
92
- prompt_para_gerar_query_dinamico = prompt_gerar_query_dinamicamente(
93
- cast(str, response_auxiliar_summary)
94
- )
95
 
96
- axiom_instance.send_axiom(
97
- "COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE"
98
- )
99
- query_gerado_dinamicamente_para_o_vector_store = (
100
- await llm.google_gemini_ainvoke(
101
- prompt_para_gerar_query_dinamico, "gemini-2.0-flash"
102
  )
103
- )
104
 
105
- axiom_instance.send_axiom(
106
- f"query_gerado_dinamicamente_para_o_vector_store: {query_gerado_dinamicamente_para_o_vector_store.content}",
107
- )
 
 
 
108
 
109
- # Create enhanced vector store and BM25 index
110
- vector_store, bm25, chunk_ids = (
111
- summarizer.vector_store.create_enhanced_vector_store(
112
- chunks_processados, is_contextualized_chunk, axiom_instance
 
 
 
 
 
 
113
  )
114
- )
115
 
116
- llm_ultimas_requests = serializer.llm_ultimas_requests
117
- axiom_instance.send_axiom("COMEÇANDO A FAZER ÚLTIMA REQUISIÇÃO")
118
- structured_summaries = await summarizer.gerar_documento_final(
119
- vector_store,
120
- bm25,
121
- chunk_ids,
122
- llm_ultimas_requests,
123
- cast(
124
- str, query_gerado_dinamicamente_para_o_vector_store.content
125
- ), # prompt_auxiliar_SEM_CONTEXT,
126
- )
127
- axiom_instance.send_axiom("TERMINOU DE FAZER A ÚLTIMA REQUISIÇÃO")
128
 
129
- if not isinstance(structured_summaries, list):
130
- from rest_framework.response import Response
131
 
132
- return Response({"erro": structured_summaries})
133
 
134
- texto_completo = summarizer.resumo_gerado + "\n\n"
 
 
 
135
 
136
- for x in structured_summaries:
137
- texto_completo = texto_completo + x["content"] + "\n"
138
- x["source"]["text"] = x["source"]["text"][0:200]
139
- x["source"]["context"] = x["source"]["context"][0:200]
140
 
141
  texto_completo_como_html = convert_markdown_to_HTML(texto_completo).replace(
142
  "resposta_segunda_etapa:", "<br><br>"
 
54
  # Initialize enhanced summarizer
55
  summarizer = GerarDocumento(serializer, axiom_instance)
56
 
57
+ all_PDFs_chunks, full_text_as_array, vertex_response = (
58
+ await get_full_text_and_all_PDFs_chunks(
59
+ listaPDFs,
60
+ summarizer.splitter,
61
+ serializer.should_use_llama_parse,
62
+ isBubble,
63
+ )
64
  )
65
  axiom_instance.send_axiom(
66
  f"INÍCIO DO TEXTO COMPLETO DOS PDFS: {full_text_as_array[0:5]}"
67
  )
68
 
69
+ if not vertex_response:
70
+ is_contextualized_chunk = serializer.should_have_contextual_chunks
71
+
72
+ if is_contextualized_chunk:
73
+ response_auxiliar_summary = (
74
+ await get_response_from_auxiliar_contextual_prompt(
75
+ full_text_as_array
76
+ )
77
+ )
78
+ axiom_instance.send_axiom(
79
+ f"RESUMO INICIAL DO PROCESSO: {response_auxiliar_summary}"
80
+ )
81
+
82
+ axiom_instance.send_axiom(
83
+ "COMEÇANDO A FAZER AS REQUISIÇÕES DO CONTEXTUAL"
84
+ )
85
+ contextualized_chunks = (
86
+ await contextual_retriever.contextualize_all_chunks(
87
+ all_PDFs_chunks, response_auxiliar_summary, axiom_instance
88
+ )
89
+ )
90
+ axiom_instance.send_axiom(
91
+ "TERMINOU DE FAZER TODAS AS REQUISIÇÕES DO CONTEXTUAL"
92
+ )
93
+ chunks_processados = contextualized_chunks
94
+ axiom_instance.send_axiom(
95
+ f"CHUNKS PROCESSADOS INICIALMENTE: {chunks_processados}"
96
+ )
97
+ else:
98
+ chunks_processados = all_PDFs_chunks
99
+
100
+ llm = LLM()
101
+ prompt_para_gerar_query_dinamico = prompt_gerar_query_dinamicamente(
102
+ cast(str, response_auxiliar_summary)
103
  )
104
 
 
 
 
 
105
  axiom_instance.send_axiom(
106
+ "COMEÇANDO REQUISIÇÃO PARA GERAR O QUERY DINAMICAMENTE DO VECTOR STORE"
107
  )
108
+ query_gerado_dinamicamente_para_o_vector_store = (
109
+ await llm.google_gemini_ainvoke(
110
+ prompt_para_gerar_query_dinamico, "gemini-2.0-flash"
111
+ )
112
  )
 
 
 
 
 
 
 
113
 
114
+ axiom_instance.send_axiom(
115
+ f"query_gerado_dinamicamente_para_o_vector_store: {query_gerado_dinamicamente_para_o_vector_store.content}",
 
 
 
 
116
  )
 
117
 
118
+ # Create enhanced vector store and BM25 index
119
+ vector_store, bm25, chunk_ids = (
120
+ summarizer.vector_store.create_enhanced_vector_store(
121
+ chunks_processados, is_contextualized_chunk, axiom_instance
122
+ )
123
+ )
124
 
125
+ llm_ultimas_requests = serializer.llm_ultimas_requests
126
+ axiom_instance.send_axiom("COMEÇANDO A FAZER ÚLTIMA REQUISIÇ��O")
127
+ structured_summaries = await summarizer.gerar_documento_final(
128
+ vector_store,
129
+ bm25,
130
+ chunk_ids,
131
+ llm_ultimas_requests,
132
+ cast(
133
+ str, query_gerado_dinamicamente_para_o_vector_store.content
134
+ ), # prompt_auxiliar_SEM_CONTEXT,
135
  )
136
+ axiom_instance.send_axiom("TERMINOU DE FAZER A ÚLTIMA REQUISIÇÃO")
137
 
138
+ if not isinstance(structured_summaries, list):
139
+ from rest_framework.response import Response
 
 
 
 
 
 
 
 
 
 
140
 
141
+ return Response({"erro": structured_summaries})
 
142
 
143
+ texto_completo = summarizer.resumo_gerado + "\n\n"
144
 
145
+ for x in structured_summaries:
146
+ texto_completo = texto_completo + x["content"] + "\n"
147
+ x["source"]["text"] = x["source"]["text"][0:200]
148
+ x["source"]["context"] = x["source"]["context"][0:200]
149
 
150
+ else:
151
+ axiom_instance.send_axiom("FOI UTILIZADO O VERTEX AI DO GOOGLE")
 
 
152
 
153
  texto_completo_como_html = convert_markdown_to_HTML(texto_completo).replace(
154
  "resposta_segunda_etapa:", "<br><br>"
_utils/gerar_documento_utils/prompts.py CHANGED
@@ -1,4 +1,14 @@
1
- def create_prompt_auxiliar_do_contextual_prompt(PROCESSO_JURIDICO: str):
 
 
 
 
 
 
 
 
 
 
2
  return f"""
3
  <prompt>
4
  <persona>
@@ -46,10 +56,7 @@ Seu objetivo é analisar o processo jurídico fornecido e gerar um relatório co
46
  <instrucoes>
47
  Siga estritamente os passos abaixo:
48
 
49
- 1. **Análise Completa:** Leia e analise todo o conteúdo do processo fornecido.
50
- <processo_juridico>
51
- {PROCESSO_JURIDICO}
52
- </processo_juridico>
53
 
54
  2. **Identificação e Listagem de Peças:** Identifique quais das peças listadas na `<tarefa>` estão presentes no texto. Liste **apenas** as encontradas na tag `<pecas_identificadas>`.
55
 
 
1
+ def create_prompt_auxiliar_do_contextual_prompt(PROCESSO_JURIDICO: str | None = None):
2
+ if PROCESSO_JURIDICO:
3
+ adicionar_ao_prompt = f"""
4
+ 1. **Análise Completa:** Leia e analise todo o conteúdo do processo fornecido.
5
+ <processo_juridico>
6
+ {PROCESSO_JURIDICO}
7
+ </processo_juridico>"""
8
+ else:
9
+ adicionar_ao_prompt = """
10
+ 1. **Análise Completa:** Leia e analise todo o conteúdo do processo fornecido como PDF."""
11
+
12
  return f"""
13
  <prompt>
14
  <persona>
 
56
  <instrucoes>
57
  Siga estritamente os passos abaixo:
58
 
59
+ {adicionar_ao_prompt}
 
 
 
60
 
61
  2. **Identificação e Listagem de Peças:** Identifique quais das peças listadas na `<tarefa>` estão presentes no texto. Liste **apenas** as encontradas na tag `<pecas_identificadas>`.
62
 
_utils/gerar_documento_utils/utils.py CHANGED
@@ -106,11 +106,13 @@ async def get_full_text_and_all_PDFs_chunks(
106
  splitterObject: Splitter,
107
  should_use_llama_parse: bool,
108
  isBubble: bool,
109
- ) -> Tuple[List[DocumentChunk], List[str]]:
110
  all_PDFs_chunks: List[DocumentChunk] = []
111
 
112
  pages: List[str] = []
113
 
 
 
114
  # Load and process document
115
  for pdf_path in listaPDFs:
116
  chunks, pages = await splitterObject.load_and_split_document(
@@ -118,7 +120,14 @@ async def get_full_text_and_all_PDFs_chunks(
118
  )
119
  all_PDFs_chunks = all_PDFs_chunks + chunks
120
 
121
- return all_PDFs_chunks, pages
 
 
 
 
 
 
 
122
 
123
 
124
  async def generate_document_title(resumo_para_gerar_titulo: str):
 
106
  splitterObject: Splitter,
107
  should_use_llama_parse: bool,
108
  isBubble: bool,
109
+ ) -> Tuple[List[DocumentChunk], List[str], Union[None, str]]:
110
  all_PDFs_chunks: List[DocumentChunk] = []
111
 
112
  pages: List[str] = []
113
 
114
+ vertex_response = None # Só terá valor se for necessário usar Vertex da Google para enviar o pdf e gerar resposta
115
+
116
  # Load and process document
117
  for pdf_path in listaPDFs:
118
  chunks, pages = await splitterObject.load_and_split_document(
 
120
  )
121
  all_PDFs_chunks = all_PDFs_chunks + chunks
122
 
123
+ if len(pages) == 0 or len(all_PDFs_chunks) == 0:
124
+ llm = LLM()
125
+ prompt = create_prompt_auxiliar_do_contextual_prompt(None)
126
+ vertex_response = await llm.google_gemini_vertex_ainvoke(
127
+ prompt, listaPDFs, "gemini-2.0-flash"
128
+ )
129
+
130
+ return all_PDFs_chunks, pages, vertex_response
131
 
132
 
133
  async def generate_document_title(resumo_para_gerar_titulo: str):
_utils/google_integration/google_cloud.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from google.cloud import storage
3
+
4
+ GCP_PROJECT = "gen-lang-client-0350149082"
5
+
6
+
7
+ def upload_to_gcs(LOCAL_PDF_PATH: str) -> str:
8
+ GCS_BUCKET_NAME = "vella-pdfs"
9
+
10
+ # Path in GCS
11
+ GCS_DESTINATION_BLOB_NAME = "gemini_uploads/" + os.path.basename(LOCAL_PDF_PATH)
12
+
13
+ """Uploads a file to a GCS bucket and returns its URI."""
14
+ storage_client = storage.Client(
15
+ project=GCP_PROJECT,
16
+ )
17
+ bucket = storage_client.bucket(GCS_BUCKET_NAME)
18
+ blob = bucket.blob(GCS_DESTINATION_BLOB_NAME)
19
+
20
+ print(
21
+ f"Uploading {LOCAL_PDF_PATH} to gs://{GCS_BUCKET_NAME}/{GCS_DESTINATION_BLOB_NAME}..."
22
+ )
23
+ blob.upload_from_filename(LOCAL_PDF_PATH)
24
+ gcs_uri = f"gs://{GCS_BUCKET_NAME}/{GCS_DESTINATION_BLOB_NAME}"
25
+ print(f"File uploaded to {gcs_uri}")
26
+ return gcs_uri
_utils/langchain_utils/LLM_class.py CHANGED
@@ -1,9 +1,10 @@
1
- from typing import Literal, cast
2
  from pydantic import SecretStr
3
- from setup.environment import default_model
4
  from setup.easy_imports import ChatOpenAI, ChatGoogleGenerativeAI
5
  import os
6
  from langchain_core.messages import HumanMessage
 
7
 
8
  deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
9
  google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
@@ -75,3 +76,44 @@ class LLM:
75
  raise Exception(
76
  "Failed to generate the final document after 5 retries and the fallback attempt with chat-gpt-4o-mini."
77
  ) from e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Literal, cast
2
  from pydantic import SecretStr
3
+ from _utils.google_integration.google_cloud import GCP_PROJECT, upload_to_gcs
4
  from setup.easy_imports import ChatOpenAI, ChatGoogleGenerativeAI
5
  import os
6
  from langchain_core.messages import HumanMessage
7
+ from langchain_google_vertexai import ChatVertexAI
8
 
9
  deepseek_api_key = cast(str, os.environ.get("DEEPSEEKK_API_KEY"))
10
  google_api_key = cast(str, os.environ.get("GOOGLE_API_KEY_PEIXE"))
 
76
  raise Exception(
77
  "Failed to generate the final document after 5 retries and the fallback attempt with chat-gpt-4o-mini."
78
  ) from e
79
+
80
+ async def google_gemini_vertex_ainvoke(
81
+ self,
82
+ prompt: str,
83
+ list_of_pdfs: List[str],
84
+ model: Google_llms = "gemini-2.5-flash-preview-04-17",
85
+ max_retries: int = 3,
86
+ ) -> str | None:
87
+ message_parts = [
88
+ {"type": "text", "text": prompt},
89
+ ]
90
+ for pdf in list_of_pdfs:
91
+ pdf_gcs_uri = upload_to_gcs(pdf)
92
+ message_parts.append(
93
+ {
94
+ # This structure is used for file references via URI
95
+ "type": "media",
96
+ "mime_type": "application/pdf", # <-- mime_type moved up
97
+ "file_uri": pdf_gcs_uri, # <-- file_uri moved up
98
+ }
99
+ )
100
+
101
+ for attempt in range(max_retries):
102
+ try:
103
+ llm = ChatVertexAI(
104
+ model_name=model,
105
+ project=GCP_PROJECT,
106
+ location="us-central1",
107
+ temperature=0,
108
+ )
109
+ response = await llm.ainvoke(
110
+ [HumanMessage(content=message_parts)] # type: ignore
111
+ )
112
+
113
+ if isinstance(response.content, list):
114
+ response.content = "\n".join(response.content) # type: ignore
115
+
116
+ return response.content # type: ignore
117
+ except Exception as e:
118
+ model = "gemini-2.0-flash"
119
+ print(f"Attempt {attempt + 1} failed with error: {e}")
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ