ArturG9 commited on
Commit
dfb65c1
·
verified ·
1 Parent(s): 8356c3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -15,6 +15,7 @@ from langchain.prompts import PromptTemplate
15
  from langchain.vectorstores import Chroma
16
  from utils import load_txt_documents, split_docs, load_uploaded_documents, retriever_from_chroma
17
  from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter
 
18
 
19
  script_dir = os.path.dirname(os.path.abspath(__file__))
20
  data_path = os.path.join(script_dir, "data/")
@@ -45,12 +46,8 @@ def get_vectorstore(text_chunks):
45
  return vectorstore
46
 
47
  def get_pdf_text(pdf_docs):
48
- text = ""
49
- for pdf in pdf_docs:
50
- pdf_reader = PdfReader(pdf)
51
- for page in pdf_reader.pages:
52
- text += page.extract_text()
53
- return text
54
 
55
  def get_text_chunks(text):
56
  text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
 
15
  from langchain.vectorstores import Chroma
16
  from utils import load_txt_documents, split_docs, load_uploaded_documents, retriever_from_chroma
17
  from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter
18
+ from langchain_community.document_loaders.directory import DirectoryLoader
19
 
20
  script_dir = os.path.dirname(os.path.abspath(__file__))
21
  data_path = os.path.join(script_dir, "data/")
 
46
  return vectorstore
47
 
48
  def get_pdf_text(pdf_docs):
49
+ document_loader = DirectoryLoader(pdf_docs)
50
+ return document_loader.load()
 
 
 
 
51
 
52
  def get_text_chunks(text):
53
  text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(