Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import streamlit as st
|
3 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
4 |
from langchain.vectorstores import Chroma
|
5 |
-
from langchain.text_splitter import
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
from langchain.chains import ConversationalRetrievalChain, ConversationChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
@@ -17,9 +17,9 @@ def create_sidebar():
|
|
17 |
|
18 |
st.markdown("""
|
19 |
### Tools Used
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
|
24 |
### Steps
|
25 |
1. Add API key
|
@@ -40,13 +40,24 @@ def save_uploaded_file(uploaded_file, path='./uploads/'):
|
|
40 |
def load_texts_from_papers(papers):
|
41 |
all_texts = []
|
42 |
for paper in papers:
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
return all_texts
|
51 |
|
52 |
@st.cache_resource
|
@@ -88,6 +99,7 @@ def main():
|
|
88 |
return_messages=True
|
89 |
)
|
90 |
)
|
|
|
91 |
else:
|
92 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
93 |
qa_chain = ConversationChain(
|
|
|
2 |
import streamlit as st
|
3 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
4 |
from langchain.vectorstores import Chroma
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter # Changed to RecursiveCharacterTextSplitter
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
from langchain.chains import ConversationalRetrievalChain, ConversationChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
|
|
17 |
|
18 |
st.markdown("""
|
19 |
### Tools Used
|
20 |
+
- OpenAI
|
21 |
+
- LangChain
|
22 |
+
- ChromaDB
|
23 |
|
24 |
### Steps
|
25 |
1. Add API key
|
|
|
40 |
def load_texts_from_papers(papers):
|
41 |
all_texts = []
|
42 |
for paper in papers:
|
43 |
+
try:
|
44 |
+
file_path = save_uploaded_file(paper)
|
45 |
+
loader = PyPDFLoader(file_path)
|
46 |
+
documents = loader.load()
|
47 |
+
|
48 |
+
# Using RecursiveCharacterTextSplitter with proper encoding handling
|
49 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
50 |
+
chunk_size=1000,
|
51 |
+
chunk_overlap=200,
|
52 |
+
length_function=len,
|
53 |
+
is_separator_regex=False,
|
54 |
+
)
|
55 |
+
|
56 |
+
texts = text_splitter.split_documents(documents)
|
57 |
+
all_texts.extend(texts)
|
58 |
+
os.remove(file_path)
|
59 |
+
except Exception as e:
|
60 |
+
st.error(f"Error processing {paper.name}: {str(e)}")
|
61 |
return all_texts
|
62 |
|
63 |
@st.cache_resource
|
|
|
99 |
return_messages=True
|
100 |
)
|
101 |
)
|
102 |
+
st.success("PDF processed successfully!")
|
103 |
else:
|
104 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
105 |
qa_chain = ConversationChain(
|