Neurolingua commited on
Commit
aa84359
·
verified ·
1 Parent(s): 6156a6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -10,7 +10,7 @@ from langchain.vectorstores.chroma import Chroma
10
  from langchain.prompts import ChatPromptTemplate
11
  from langchain_community.llms.ollama import Ollama
12
  from get_embedding_function import get_embedding_function
13
- from langchain.document_loaders.pdf import PyPDFLoader
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
15
  from langchain.schema import Document
16
  import tempfile
@@ -166,16 +166,18 @@ def query_rag(query_text: str):
166
 
167
  def save_pdf_and_update_database(pdf_filepath):
168
  try:
169
- document_loader = PyPDFLoader(pdf_filepath)
 
170
  documents = document_loader.load()
171
-
172
  text_splitter = RecursiveCharacterTextSplitter(
173
  chunk_size=800,
174
  chunk_overlap=80,
175
  length_function=len,
 
176
  )
177
  chunks = text_splitter.split_documents(documents)
178
-
179
  add_to_chroma(chunks)
180
  print(f"PDF processed and data updated in Chroma.")
181
  except Exception as e:
 
10
  from langchain.prompts import ChatPromptTemplate
11
  from langchain_community.llms.ollama import Ollama
12
  from get_embedding_function import get_embedding_function
13
+ from langchain.document_loaders import PyPDFDirectoryLoader
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
15
  from langchain.schema import Document
16
  import tempfile
 
166
 
167
  def save_pdf_and_update_database(pdf_filepath):
168
  try:
169
+ # Assuming you're loading PDFs from a specific directory
170
+ document_loader = PyPDFDirectoryLoader(os.path.dirname(pdf_filepath))
171
  documents = document_loader.load()
172
+
173
  text_splitter = RecursiveCharacterTextSplitter(
174
  chunk_size=800,
175
  chunk_overlap=80,
176
  length_function=len,
177
+ is_separator_regex=False,
178
  )
179
  chunks = text_splitter.split_documents(documents)
180
+
181
  add_to_chroma(chunks)
182
  print(f"PDF processed and data updated in Chroma.")
183
  except Exception as e: