Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -44,12 +44,33 @@ def get_pdf_text(pdf_docs : list) -> str:
|
|
44 |
return text
|
45 |
|
46 |
|
47 |
-
def get_text_chunks(text:str) ->list:
|
48 |
-
text_splitter = CharacterTextSplitter(
|
49 |
-
separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
)
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
def get_vectorstore(text_chunks : list) -> FAISS:
|
|
|
44 |
return text
|
45 |
|
46 |
|
47 |
+
#def get_text_chunks(text:str) ->list:
|
48 |
+
# text_splitter = CharacterTextSplitter(
|
49 |
+
# separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
|
50 |
+
# )
|
51 |
+
# chunks = text_splitter.split_text(text)
|
52 |
+
# return chunks
|
53 |
+
|
54 |
+
def get_text_chunks(pages):
|
55 |
+
"""
|
56 |
+
Split the input text into chunks.
|
57 |
+
Parameters
|
58 |
+
----------
|
59 |
+
text : str
|
60 |
+
The input text to be split.
|
61 |
+
Returns
|
62 |
+
-------
|
63 |
+
list
|
64 |
+
List of text chunks.
|
65 |
+
"""
|
66 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
67 |
+
chunk_size=1024, chunk_overlap=64
|
68 |
)
|
69 |
+
texts = text_splitter.split_documents(pages)
|
70 |
+
print(str(len(texts)))
|
71 |
+
return texts
|
72 |
+
|
73 |
+
|
74 |
|
75 |
|
76 |
def get_vectorstore(text_chunks : list) -> FAISS:
|