JPLTedCas commited on
Commit
5635ea3
·
verified ·
1 Parent(s): 016d374

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -5
app.py CHANGED
@@ -44,12 +44,33 @@ def get_pdf_text(pdf_docs : list) -> str:
44
  return text
45
 
46
 
47
- def get_text_chunks(text:str) ->list:
48
- text_splitter = CharacterTextSplitter(
49
- separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  )
51
- chunks = text_splitter.split_text(text)
52
- return chunks
 
 
 
53
 
54
 
55
  def get_vectorstore(text_chunks : list) -> FAISS:
 
44
  return text
45
 
46
 
47
+ #def get_text_chunks(text:str) ->list:
48
+ # text_splitter = CharacterTextSplitter(
49
+ # separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
50
+ # )
51
+ # chunks = text_splitter.split_text(text)
52
+ # return chunks
53
+
54
+ def get_text_chunks(pages):
55
+ """
56
+ Split the input text into chunks.
57
+ Parameters
58
+ ----------
59
+ text : str
60
+ The input text to be split.
61
+ Returns
62
+ -------
63
+ list
64
+ List of text chunks.
65
+ """
66
+ text_splitter = RecursiveCharacterTextSplitter(
67
+ chunk_size=1024, chunk_overlap=64
68
  )
69
+ texts = text_splitter.split_documents(pages)
70
+ print(str(len(texts)))
71
+ return texts
72
+
73
+
74
 
75
 
76
  def get_vectorstore(text_chunks : list) -> FAISS: