Kuba Fietkiewicz commited on
Commit
cceecbe
·
0 Parent(s):

working chatbot with lfs files

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pdf filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .DS_Store
2
+ .python-version
3
+ __pycache__/
4
+ .env
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Swiss Law Chatbot
2
+
3
+ ## Installation
4
+ ```
5
+ pip install -r requirements.txt
6
+ ```
7
+
8
+
9
+ ## Running in development
10
+ ```
11
+ export OPENAI_API_KEY=
12
+ python app.py
13
+ ```
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatOpenAI
2
+ from langchain.llms import OpenAI
3
+ from langchain.memory import ConversationSummaryMemory
4
+ from langchain.schema import HumanMessage, SystemMessage
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain.schema import AIMessage, HumanMessage
7
+ from langchain.document_loaders import PyPDFDirectoryLoader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.embeddings import OpenAIEmbeddings
10
+ from langchain.vectorstores import Chroma
11
+ import openai
12
+ import gradio as gr
13
+ import os
14
+
15
+ #os.envrion["OPENAI_API_KEY"] = "sk-..." # Replace with your key
16
+
17
+ # use the following line to load a directory of PDFs
18
+ loader = PyPDFDirectoryLoader("data/")
19
+ data = loader.load_and_split()
20
+
21
+ text_splitter = RecursiveCharacterTextSplitter(
22
+ chunk_size=500,
23
+ chunk_overlap=0
24
+ )
25
+ all_splits = text_splitter.split_documents(data)
26
+
27
+ vectorstore = Chroma.from_documents(
28
+ documents=all_splits,
29
+ embedding=OpenAIEmbeddings()
30
+ )
31
+
32
+ llm = ChatOpenAI(temperature=1.0, model="gpt-4-1106-preview")
33
+ memory = ConversationSummaryMemory(
34
+ llm=llm,
35
+ memory_key="chat_history",
36
+ return_messages=True
37
+ )
38
+
39
+ retriever = vectorstore.as_retriever()
40
+ # Initialize the Conversational Retrieval Chain
41
+ qa_chain = ConversationalRetrievalChain.from_llm(
42
+ llm,
43
+ retriever=retriever,
44
+ memory=memory
45
+ )
46
+
47
+ def predict(message, history):
48
+ # Convert the history into LangChain format
49
+ history_langchain_format = []
50
+ for human, ai in history:
51
+ history_langchain_format.append(HumanMessage(content=human))
52
+ history_langchain_format.append(AIMessage(content=ai))
53
+
54
+ # Add the current user message
55
+ history_langchain_format.append(HumanMessage(content=message))
56
+
57
+ # Get a response from the Conversational Retrieval Chain
58
+ response = qa_chain.run(question=message)
59
+
60
+ print(response)
61
+
62
+ # Extract and return the content of the response
63
+ return response # or modify as needed based on the response structure
64
+
65
+
66
+
67
+ demo = gr.ChatInterface(predict)
68
+ demo.launch()
data/75627.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61941ba3bfb58849f0412b7ba77d6b4e8d02fd845b104dc8e57877670b9c1f1a
3
+ size 140220
data/Leitlinien Künstliche Intelligenz - FR.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e638c3daa54be5386feabb180ba146d21afbc94e83c73ecb1cd2244242c51cbe
3
+ size 404320
data/Livre-blanc-le-RGPD-en-10-points.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84daa41c6feb8a2bba2b7cf630e64e7b53980ad3bef797764979adbf6eeddb47
3
+ size 420657
data/RS-235.1-01092023-FR ADDED
Binary file (420 kB). View file
 
data/Tableau_comparatif_de_la_r_vision_de_la_LPD_1602156903.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541dfa3502089387ec03659d6b5176ac05080f6cc776ff7fa5f5d6fd8df1a72f
3
+ size 758883
data/cybersecurite_f.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a32ce69303557d79d4afafcb59bc80c7c9abc07b02a05e7e18fd198b4b1c6be
3
+ size 300655
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ langchain
4
+ chromadb
5
+ pypdf
6
+ tiktoken