working chatbot with lfs files

Files changed (11) hide show

.gitattributes +1 -0
.gitignore +4 -0
README.md +13 -0
app.py +68 -0
data/75627.pdf +3 -0
data/Leitlinien Künstliche Intelligenz - FR.pdf +3 -0
data/Livre-blanc-le-RGPD-en-10-points.pdf +3 -0
data/RS-235.1-01092023-FR +0 -0
data/Tableau_comparatif_de_la_r_vision_de_la_LPD_1602156903.pdf +3 -0
data/cybersecurite_f.pdf +3 -0
requirements.txt +6 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pdf filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.DS_Store
+.python-version
+__pycache__/
+.env

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Swiss Law Chatbot
+## Installation
+```
+pip install -r requirements.txt
+```
+## Running in development
+```
+export OPENAI_API_KEY=
+python app.py
+```

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from langchain.chat_models import ChatOpenAI
+from langchain.llms import OpenAI
+from langchain.memory import ConversationSummaryMemory
+from langchain.schema import HumanMessage, SystemMessage
+from langchain.chains import ConversationalRetrievalChain
+from langchain.schema import AIMessage, HumanMessage
+from langchain.document_loaders import PyPDFDirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+import openai
+import gradio as gr
+import os
+#os.envrion["OPENAI_API_KEY"] = "sk-..."  # Replace with your key
+# use the following line to load a directory of PDFs
+loader = PyPDFDirectoryLoader("data/")
+data = loader.load_and_split()
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=500,
+    chunk_overlap=0
+)
+all_splits = text_splitter.split_documents(data)
+vectorstore = Chroma.from_documents(
+    documents=all_splits,
+    embedding=OpenAIEmbeddings()
+)
+llm = ChatOpenAI(temperature=1.0, model="gpt-4-1106-preview")
+memory = ConversationSummaryMemory(
+    llm=llm,
+    memory_key="chat_history",
+    return_messages=True
+)
+retriever = vectorstore.as_retriever()
+# Initialize the Conversational Retrieval Chain
+qa_chain = ConversationalRetrievalChain.from_llm(
+    llm,
+    retriever=retriever,
+    memory=memory
+)
+def predict(message, history):
+    # Convert the history into LangChain format
+    history_langchain_format = []
+    for human, ai in history:
+        history_langchain_format.append(HumanMessage(content=human))
+        history_langchain_format.append(AIMessage(content=ai))
+    # Add the current user message
+    history_langchain_format.append(HumanMessage(content=message))
+    # Get a response from the Conversational Retrieval Chain
+    response = qa_chain.run(question=message)
+    print(response)
+    # Extract and return the content of the response
+    return response  # or modify as needed based on the response structure
+demo = gr.ChatInterface(predict)
+demo.launch()

data/75627.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61941ba3bfb58849f0412b7ba77d6b4e8d02fd845b104dc8e57877670b9c1f1a
+size 140220

data/Leitlinien Künstliche Intelligenz - FR.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e638c3daa54be5386feabb180ba146d21afbc94e83c73ecb1cd2244242c51cbe
+size 404320

data/Livre-blanc-le-RGPD-en-10-points.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84daa41c6feb8a2bba2b7cf630e64e7b53980ad3bef797764979adbf6eeddb47
+size 420657

data/RS-235.1-01092023-FR ADDED Viewed

Binary file (420 kB). View file

data/Tableau_comparatif_de_la_r_vision_de_la_LPD_1602156903.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:541dfa3502089387ec03659d6b5176ac05080f6cc776ff7fa5f5d6fd8df1a72f
+size 758883

data/cybersecurite_f.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a32ce69303557d79d4afafcb59bc80c7c9abc07b02a05e7e18fd198b4b1c6be
+size 300655

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+openai
+langchain
+chromadb
+pypdf
+tiktoken