Spaces:
Runtime error
Runtime error
Kuba Fietkiewicz
commited on
Commit
·
cceecbe
0
Parent(s):
working chatbot with lfs files
Browse files- .gitattributes +1 -0
- .gitignore +4 -0
- README.md +13 -0
- app.py +68 -0
- data/75627.pdf +3 -0
- data/Leitlinien Künstliche Intelligenz - FR.pdf +3 -0
- data/Livre-blanc-le-RGPD-en-10-points.pdf +3 -0
- data/RS-235.1-01092023-FR +0 -0
- data/Tableau_comparatif_de_la_r_vision_de_la_LPD_1602156903.pdf +3 -0
- data/cybersecurite_f.pdf +3 -0
- requirements.txt +6 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
.python-version
|
3 |
+
__pycache__/
|
4 |
+
.env
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Swiss Law Chatbot
|
2 |
+
|
3 |
+
## Installation
|
4 |
+
```
|
5 |
+
pip install -r requirements.txt
|
6 |
+
```
|
7 |
+
|
8 |
+
|
9 |
+
## Running in development
|
10 |
+
```
|
11 |
+
export OPENAI_API_KEY=
|
12 |
+
python app.py
|
13 |
+
```
|
app.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chat_models import ChatOpenAI
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.memory import ConversationSummaryMemory
|
4 |
+
from langchain.schema import HumanMessage, SystemMessage
|
5 |
+
from langchain.chains import ConversationalRetrievalChain
|
6 |
+
from langchain.schema import AIMessage, HumanMessage
|
7 |
+
from langchain.document_loaders import PyPDFDirectoryLoader
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain.embeddings import OpenAIEmbeddings
|
10 |
+
from langchain.vectorstores import Chroma
|
11 |
+
import openai
|
12 |
+
import gradio as gr
|
13 |
+
import os
|
14 |
+
|
15 |
+
#os.envrion["OPENAI_API_KEY"] = "sk-..." # Replace with your key
|
16 |
+
|
17 |
+
# use the following line to load a directory of PDFs
|
18 |
+
loader = PyPDFDirectoryLoader("data/")
|
19 |
+
data = loader.load_and_split()
|
20 |
+
|
21 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
22 |
+
chunk_size=500,
|
23 |
+
chunk_overlap=0
|
24 |
+
)
|
25 |
+
all_splits = text_splitter.split_documents(data)
|
26 |
+
|
27 |
+
vectorstore = Chroma.from_documents(
|
28 |
+
documents=all_splits,
|
29 |
+
embedding=OpenAIEmbeddings()
|
30 |
+
)
|
31 |
+
|
32 |
+
llm = ChatOpenAI(temperature=1.0, model="gpt-4-1106-preview")
|
33 |
+
memory = ConversationSummaryMemory(
|
34 |
+
llm=llm,
|
35 |
+
memory_key="chat_history",
|
36 |
+
return_messages=True
|
37 |
+
)
|
38 |
+
|
39 |
+
retriever = vectorstore.as_retriever()
|
40 |
+
# Initialize the Conversational Retrieval Chain
|
41 |
+
qa_chain = ConversationalRetrievalChain.from_llm(
|
42 |
+
llm,
|
43 |
+
retriever=retriever,
|
44 |
+
memory=memory
|
45 |
+
)
|
46 |
+
|
47 |
+
def predict(message, history):
|
48 |
+
# Convert the history into LangChain format
|
49 |
+
history_langchain_format = []
|
50 |
+
for human, ai in history:
|
51 |
+
history_langchain_format.append(HumanMessage(content=human))
|
52 |
+
history_langchain_format.append(AIMessage(content=ai))
|
53 |
+
|
54 |
+
# Add the current user message
|
55 |
+
history_langchain_format.append(HumanMessage(content=message))
|
56 |
+
|
57 |
+
# Get a response from the Conversational Retrieval Chain
|
58 |
+
response = qa_chain.run(question=message)
|
59 |
+
|
60 |
+
print(response)
|
61 |
+
|
62 |
+
# Extract and return the content of the response
|
63 |
+
return response # or modify as needed based on the response structure
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
demo = gr.ChatInterface(predict)
|
68 |
+
demo.launch()
|
data/75627.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61941ba3bfb58849f0412b7ba77d6b4e8d02fd845b104dc8e57877670b9c1f1a
|
3 |
+
size 140220
|
data/Leitlinien Künstliche Intelligenz - FR.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e638c3daa54be5386feabb180ba146d21afbc94e83c73ecb1cd2244242c51cbe
|
3 |
+
size 404320
|
data/Livre-blanc-le-RGPD-en-10-points.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84daa41c6feb8a2bba2b7cf630e64e7b53980ad3bef797764979adbf6eeddb47
|
3 |
+
size 420657
|
data/RS-235.1-01092023-FR
ADDED
Binary file (420 kB). View file
|
|
data/Tableau_comparatif_de_la_r_vision_de_la_LPD_1602156903.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:541dfa3502089387ec03659d6b5176ac05080f6cc776ff7fa5f5d6fd8df1a72f
|
3 |
+
size 758883
|
data/cybersecurite_f.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a32ce69303557d79d4afafcb59bc80c7c9abc07b02a05e7e18fd198b4b1c6be
|
3 |
+
size 300655
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
openai
|
3 |
+
langchain
|
4 |
+
chromadb
|
5 |
+
pypdf
|
6 |
+
tiktoken
|