Spaces:

mqcm2
/

test1

Paused

App Files Files Community

Muhammad Qasim commited on Sep 5, 2024

Commit

520da56

1 Parent(s): dd251ef

version updated

Browse files

Files changed (3) hide show

.env.example +2 -0
README.md +6 -0
app.py +12 -4

.env.example ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ HUGGINGFACEHUB_API_TOKEN=
2	+ OPENAI_API_KEY=

README.md CHANGED Viewed

@@ -38,6 +38,12 @@ Before using the chatbot, ensure you have the following installed:
     pip install -r requirements.txt
     ```
 ## Usage 📋
 1. Run the chatbot using the following command:

     pip install -r requirements.txt
     ```
+4. Copy .env.example to .env and set your OpenAI & Hugging Face API keys:
+    ```shell
+    cp .env.example .env
+    ```
 ## Usage 📋
 1. Run the chatbot using the following command:

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
-from langchain.chat_models import Chat
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
@@ -12,7 +12,8 @@ from htmlTemplates import css, bot_template, user_template, hide_st_style, foote
 from langchain.llms import HuggingFaceHub
 from matplotlib import style
-def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
@@ -20,6 +21,7 @@ def get_pdf_text(pdf_docs):
             text += page.extract_text()
     return text
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
@@ -30,12 +32,14 @@ def get_text_chunks(text):
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vectorstore(text_chunks):
     embeddings = OpenAIEmbeddings()
     # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 def get_conversation_chain(vectorstore):
     llm = ChatOpenAI()
     # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
@@ -49,6 +53,7 @@ def get_conversation_chain(vectorstore):
     )
     return conversation_chain
 def handle_userinput(user_question):
     if st.session_state.conversation is None:
         st.error("Please upload PDF data before starting the chat.")
@@ -65,10 +70,11 @@ def handle_userinput(user_question):
             st.write(bot_template.replace(
                 "{{MSG}}", message.content), unsafe_allow_html=True)
 def main():
     load_dotenv()
     st.set_page_config(page_title="Talk with PDF",
-                    page_icon="icon.png")
     st.write(css, unsafe_allow_html=True)
     if "conversation" not in st.session_state:
@@ -92,7 +98,8 @@ def main():
                     raw_text = get_pdf_text(pdf_docs)
                     text_chunks = get_text_chunks(raw_text)
                     vectorstore = get_vectorstore(text_chunks)
-                    st.session_state.conversation = get_conversation_chain(vectorstore)
                     st.success("Your Data has been processed successfully")
     if user_question:
@@ -101,5 +108,6 @@ def main():
     st.markdown(hide_st_style, unsafe_allow_html=True)
     st.markdown(footer, unsafe_allow_html=True)
 if __name__ == '__main__':
     main()

 from langchain.text_splitter import CharacterTextSplitter
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
+from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain.llms import HuggingFaceHub
 from matplotlib import style
+def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
             text += page.extract_text()
     return text
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vectorstore(text_chunks):
     embeddings = OpenAIEmbeddings()
     # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 def get_conversation_chain(vectorstore):
     llm = ChatOpenAI()
     # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
     )
     return conversation_chain
 def handle_userinput(user_question):
     if st.session_state.conversation is None:
         st.error("Please upload PDF data before starting the chat.")
             st.write(bot_template.replace(
                 "{{MSG}}", message.content), unsafe_allow_html=True)
 def main():
     load_dotenv()
     st.set_page_config(page_title="Talk with PDF",
+                       page_icon="icon.png")
     st.write(css, unsafe_allow_html=True)
     if "conversation" not in st.session_state:
                     raw_text = get_pdf_text(pdf_docs)
                     text_chunks = get_text_chunks(raw_text)
                     vectorstore = get_vectorstore(text_chunks)
+                    st.session_state.conversation = get_conversation_chain(
+                        vectorstore)
                     st.success("Your Data has been processed successfully")
     if user_question:
     st.markdown(hide_st_style, unsafe_allow_html=True)
     st.markdown(footer, unsafe_allow_html=True)
 if __name__ == '__main__':
     main()