rajesh1729 commited on
Commit
e5702bf
·
verified ·
1 Parent(s): 53d8e52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import streamlit as st
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
  from langchain.vectorstores import Chroma
5
- from langchain.text_splitter import CharacterTextSplitter
6
  from langchain.chat_models import ChatOpenAI
7
  from langchain.chains import ConversationalRetrievalChain, ConversationChain
8
  from langchain.memory import ConversationBufferMemory
@@ -17,9 +17,9 @@ def create_sidebar():
17
 
18
  st.markdown("""
19
  ### Tools Used
20
- OpenAI
21
- LangChain
22
- ChromaDB
23
 
24
  ### Steps
25
  1. Add API key
@@ -40,13 +40,24 @@ def save_uploaded_file(uploaded_file, path='./uploads/'):
40
  def load_texts_from_papers(papers):
41
  all_texts = []
42
  for paper in papers:
43
- file_path = save_uploaded_file(paper)
44
- loader = PyPDFLoader(file_path)
45
- documents = loader.load()
46
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
47
- texts = text_splitter.split_documents(documents)
48
- all_texts.extend(texts)
49
- os.remove(file_path)
 
 
 
 
 
 
 
 
 
 
 
50
  return all_texts
51
 
52
  @st.cache_resource
@@ -88,6 +99,7 @@ def main():
88
  return_messages=True
89
  )
90
  )
 
91
  else:
92
  memory = ConversationBufferMemory(memory_key="chat_history")
93
  qa_chain = ConversationChain(
 
2
  import streamlit as st
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
  from langchain.vectorstores import Chroma
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter # Changed to RecursiveCharacterTextSplitter
6
  from langchain.chat_models import ChatOpenAI
7
  from langchain.chains import ConversationalRetrievalChain, ConversationChain
8
  from langchain.memory import ConversationBufferMemory
 
17
 
18
  st.markdown("""
19
  ### Tools Used
20
+ - OpenAI
21
+ - LangChain
22
+ - ChromaDB
23
 
24
  ### Steps
25
  1. Add API key
 
40
  def load_texts_from_papers(papers):
41
  all_texts = []
42
  for paper in papers:
43
+ try:
44
+ file_path = save_uploaded_file(paper)
45
+ loader = PyPDFLoader(file_path)
46
+ documents = loader.load()
47
+
48
+ # Using RecursiveCharacterTextSplitter with proper encoding handling
49
+ text_splitter = RecursiveCharacterTextSplitter(
50
+ chunk_size=1000,
51
+ chunk_overlap=200,
52
+ length_function=len,
53
+ is_separator_regex=False,
54
+ )
55
+
56
+ texts = text_splitter.split_documents(documents)
57
+ all_texts.extend(texts)
58
+ os.remove(file_path)
59
+ except Exception as e:
60
+ st.error(f"Error processing {paper.name}: {str(e)}")
61
  return all_texts
62
 
63
  @st.cache_resource
 
99
  return_messages=True
100
  )
101
  )
102
+ st.success("PDF processed successfully!")
103
  else:
104
  memory = ConversationBufferMemory(memory_key="chat_history")
105
  qa_chain = ConversationChain(