luanpoppe commited on
Commit
93c6cb3
·
1 Parent(s): 451f8a3

fix: one chunks

Browse files
_utils/gerar_documento.py CHANGED
@@ -55,13 +55,11 @@ async def gerar_documento(
55
  # Initialize enhanced summarizer
56
  summarizer = GerarDocumento(serializer)
57
 
58
- all_PDFs_chunks, full_text_as_array, full_text_as_string = (
59
- await get_full_text_and_all_PDFs_chunks(
60
- listaPDFs,
61
- summarizer.splitter,
62
- serializer.should_use_llama_parse,
63
- isBubble,
64
- )
65
  )
66
 
67
  is_contextualized_chunk = serializer.should_have_contextual_chunks
@@ -104,7 +102,7 @@ async def gerar_documento(
104
  # Create enhanced vector store and BM25 index
105
  vector_store, bm25, chunk_ids = (
106
  summarizer.vector_store.create_enhanced_vector_store(
107
- chunks_processados, is_contextualized_chunk
108
  )
109
  )
110
 
 
55
  # Initialize enhanced summarizer
56
  summarizer = GerarDocumento(serializer)
57
 
58
+ all_PDFs_chunks, full_text_as_array = await get_full_text_and_all_PDFs_chunks(
59
+ listaPDFs,
60
+ summarizer.splitter,
61
+ serializer.should_use_llama_parse,
62
+ isBubble,
 
 
63
  )
64
 
65
  is_contextualized_chunk = serializer.should_have_contextual_chunks
 
102
  # Create enhanced vector store and BM25 index
103
  vector_store, bm25, chunk_ids = (
104
  summarizer.vector_store.create_enhanced_vector_store(
105
+ chunks_processados, is_contextualized_chunk, axiom_instance
106
  )
107
  )
108
 
_utils/gerar_relatorio_modelo_usuario/utils.py CHANGED
@@ -158,7 +158,7 @@ async def get_full_text_and_all_PDFs_chunks(
158
  )
159
  all_PDFs_chunks = all_PDFs_chunks + chunks
160
 
161
- return all_PDFs_chunks, pages, full_text_as_string
162
 
163
 
164
  async def generate_document_title(resumo_para_gerar_titulo: str):
 
158
  )
159
  all_PDFs_chunks = all_PDFs_chunks + chunks
160
 
161
+ return all_PDFs_chunks, pages
162
 
163
 
164
  async def generate_document_title(resumo_para_gerar_titulo: str):
_utils/langchain_utils/Splitter_class.py CHANGED
@@ -41,7 +41,6 @@ class Splitter:
41
  # pages = get_pdf_from_bubble(
42
  # pdf_path
43
  # ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
44
- full_text_as_string = ""
45
 
46
  chunks_of_string_only: List[str] = []
47
 
@@ -137,7 +136,7 @@ class Splitter:
137
  # char_count += len(text)
138
  print("TERMINOU DE ORGANIZAR PDFS EM CHUNKS")
139
 
140
- return chunks, chunks_of_string_only, full_text_as_string
141
 
142
  def load_and_split_text(self, text: str) -> List[DocumentChunk]:
143
  """Load Text and split into chunks with metadata - Criei essa função apenas para o ragas"""
 
41
  # pages = get_pdf_from_bubble(
42
  # pdf_path
43
  # ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
 
44
 
45
  chunks_of_string_only: List[str] = []
46
 
 
136
  # char_count += len(text)
137
  print("TERMINOU DE ORGANIZAR PDFS EM CHUNKS")
138
 
139
+ return chunks, chunks_of_string_only
140
 
141
  def load_and_split_text(self, text: str) -> List[DocumentChunk]:
142
  """Load Text and split into chunks with metadata - Criei essa função apenas para o ragas"""
_utils/langchain_utils/Vector_store_class.py CHANGED
@@ -6,6 +6,8 @@ from _utils.models.gerar_relatorio import (
6
  from setup.easy_imports import Chroma, BM25Okapi, HuggingFaceEmbeddings
7
  import logging
8
 
 
 
9
 
10
  class VectorStore:
11
  def __init__(self, embedding_model):
@@ -14,7 +16,10 @@ class VectorStore:
14
  pass
15
 
16
  def create_enhanced_vector_store(
17
- self, chunks: List[ContextualizedChunk], is_contextualized_chunk
 
 
 
18
  ) -> Tuple[Chroma, BM25Okapi, List[str]]:
19
  """Create vector store and BM25 index with contextualized chunks"""
20
  try:
@@ -23,7 +28,7 @@ class VectorStore:
23
  texts = [
24
  f"""<one_chunk>
25
  <document_id>{chunk.id_do_processo}</document_id>
26
- <document_context_title>Document_context{chunk.context}</document_context_title>
27
  <document_contextual_summary>{chunk.contextual_summary}</document_contextual_summary>
28
  <document_content>Document_content: {chunk.content}</document_content>
29
  </one_chunk>
@@ -31,6 +36,7 @@ class VectorStore:
31
  """
32
  for chunk in chunks
33
  ]
 
34
  else:
35
  texts = [f"{chunk.content}" for chunk in chunks]
36
 
 
6
  from setup.easy_imports import Chroma, BM25Okapi, HuggingFaceEmbeddings
7
  import logging
8
 
9
+ from setup.logging import Axiom
10
+
11
 
12
  class VectorStore:
13
  def __init__(self, embedding_model):
 
16
  pass
17
 
18
  def create_enhanced_vector_store(
19
+ self,
20
+ chunks: List[ContextualizedChunk],
21
+ is_contextualized_chunk,
22
+ axiom_instance: Axiom,
23
  ) -> Tuple[Chroma, BM25Okapi, List[str]]:
24
  """Create vector store and BM25 index with contextualized chunks"""
25
  try:
 
28
  texts = [
29
  f"""<one_chunk>
30
  <document_id>{chunk.id_do_processo}</document_id>
31
+ <document_context_title>{chunk.context}</document_context_title>
32
  <document_contextual_summary>{chunk.contextual_summary}</document_contextual_summary>
33
  <document_content>Document_content: {chunk.content}</document_content>
34
  </one_chunk>
 
36
  """
37
  for chunk in chunks
38
  ]
39
+ axiom_instance.send_axiom(f"Chunks gerados: {texts}")
40
  else:
41
  texts = [f"{chunk.content}" for chunk in chunks]
42