Spaces:
Running
Running
luanpoppe
commited on
Commit
·
32df555
1
Parent(s):
605a49c
fix: minor fix
Browse files
_utils/splitters/Splitter_class.py
CHANGED
@@ -39,6 +39,7 @@ class Splitter:
|
|
39 |
# pages = get_pdf_from_bubble(
|
40 |
# pdf_path
|
41 |
# ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
|
|
|
42 |
|
43 |
initial_chunks: List[str] = []
|
44 |
|
@@ -48,7 +49,6 @@ class Splitter:
|
|
48 |
page_boundaries, combined_text = (
|
49 |
combine_documents_without_losing_pagination(pages)
|
50 |
)
|
51 |
-
full_text_as_string = ""
|
52 |
for page in pages:
|
53 |
full_text_as_string = full_text_as_string + page.page_content
|
54 |
initial_chunks = initial_chunks + self.text_splitter.split_text(
|
|
|
39 |
# pages = get_pdf_from_bubble(
|
40 |
# pdf_path
|
41 |
# ) # Gera uma lista de objetos Document, sendo cada item da lista referente a UMA PÁGINA inteira do PDF.
|
42 |
+
full_text_as_string = ""
|
43 |
|
44 |
initial_chunks: List[str] = []
|
45 |
|
|
|
49 |
page_boundaries, combined_text = (
|
50 |
combine_documents_without_losing_pagination(pages)
|
51 |
)
|
|
|
52 |
for page in pages:
|
53 |
full_text_as_string = full_text_as_string + page.page_content
|
54 |
initial_chunks = initial_chunks + self.text_splitter.split_text(
|