Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from langchain.prompts import PromptTemplate
|
|
15 |
from langchain.vectorstores import Chroma
|
16 |
from utils import load_txt_documents, split_docs, load_uploaded_documents, retriever_from_chroma
|
17 |
from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter
|
|
|
18 |
|
19 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
20 |
data_path = os.path.join(script_dir, "data/")
|
@@ -45,12 +46,8 @@ def get_vectorstore(text_chunks):
|
|
45 |
return vectorstore
|
46 |
|
47 |
def get_pdf_text(pdf_docs):
|
48 |
-
|
49 |
-
|
50 |
-
pdf_reader = PdfReader(pdf)
|
51 |
-
for page in pdf_reader.pages:
|
52 |
-
text += page.extract_text()
|
53 |
-
return text
|
54 |
|
55 |
def get_text_chunks(text):
|
56 |
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
|
|
15 |
from langchain.vectorstores import Chroma
|
16 |
from utils import load_txt_documents, split_docs, load_uploaded_documents, retriever_from_chroma
|
17 |
from langchain.text_splitter import TokenTextSplitter, RecursiveCharacterTextSplitter
|
18 |
+
from langchain_community.document_loaders.directory import DirectoryLoader
|
19 |
|
20 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
21 |
data_path = os.path.join(script_dir, "data/")
|
|
|
46 |
return vectorstore
|
47 |
|
48 |
def get_pdf_text(pdf_docs):
|
49 |
+
document_loader = DirectoryLoader(pdf_docs)
|
50 |
+
return document_loader.load()
|
|
|
|
|
|
|
|
|
51 |
|
52 |
def get_text_chunks(text):
|
53 |
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|