File size: 5,828 Bytes
f4d849c
 
 
 
 
 
8a0aab3
e7de495
8a0aab3
 
 
e7de495
 
 
 
 
8a0aab3
 
 
 
 
 
 
 
 
 
 
 
 
 
e7de495
 
 
 
a0d14b1
 
 
 
 
 
 
 
e7de495
 
a0d14b1
e7de495
 
a0d14b1
e7de495
 
 
 
 
 
 
 
 
 
 
 
 
a0d14b1
 
e7de495
 
a244b8c
e7de495
 
 
a0d14b1
9246260
a0d14b1
e7de495
 
80d7a37
e7de495
4a39d00
 
a0d14b1
 
4a39d00
8a0aab3
e7de495
a0d14b1
e7de495
a0d14b1
e7de495
 
 
 
 
 
 
a0d14b1
e7de495
d6f7b0b
e7de495
a0d14b1
 
af7276c
 
2e3a206
3e45cfb
 
bd5c172
3e45cfb
 
 
 
 
64ff1c4
af7276c
3e45cfb
af7276c
 
 
15c464b
a0d14b1
af7276c
e7de495
 
 
 
bb09abf
e7de495
bb09abf
e7de495
bb09abf
7803fd6
e7de495
bb09abf
e7de495
bb09abf
7803fd6
bb09abf
 
e7de495
 
bb09abf
e7de495
bb09abf
4a39d00
e7de495
bb09abf
 
 
7803fd6
bb09abf
 
18f4e61
bb09abf
 
 
 
 
18f4e61
7803fd6
2bd73c6
bb09abf
d6f7b0b
 
bb09abf
7803fd6
a0d14b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# ===========================================
# title: ๐™ณ๐šŠ๐šข๐šœ๐š˜๐š๐š-๐™ฐ๐šœ๐šœ๐š’๐šœ๐š๐šŠ๐š—๐š| ๐—ฟ๐—ฐ๐˜ƒ๐—ฒ๐—ฟ๐—ฒ๐—ฑ-๐—ณ๐˜‚๐—ป๐—ฐ๐˜_๐Ÿฐ-๐—ฅ๐—”๐—ค๐—”
# file: app.py
# NOTE: chainlit==0.6.2
# ===========================================

# --LLMs
from langchain.chat_models import ChatOpenAI

# --chains, components
from langchain.chains import RetrievalQA
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

# --embeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings

# --document Loaders, processing
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# --vector Stores, storage
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore

# --other ibraries
import chainlit as cl

system_template = """
Use the following pieces of context to answer the user's question.
You are an AI customer service assistant for Daysoff and respond to queries in Norwegian language (by default).
Your expertise is in providing fast, accurate and on-brand answers that covers frequently asked questions about 
Daysoff firmahytteordning and personvernspolicy.

If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
If the question is not about either of these two topics, politely inform them that you are tuned to only answer 
questions about this and that all other queries are best directed to [email protected].

You can make inferences based on the context as long as it still faithfully represents the feedback.

Example of how your response should be, using a warm, direct, and professional tone:

```
foo
```

Begin!
----------------
{context}"""

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate(messages=messages)
chain_type_kwargs = {"prompt": prompt}



@cl.author_rename
def rename(orig_author: str):
    rename_dict = {"Just a moment": "Thinking.."}
    return rename_dict.get(orig_author, orig_author)

@cl.on_chat_start
async def start():

    msg = cl.Message(content="Building vector store...")
    await msg.send()

    loader = CSVLoader(file_path="./data/knowledge_base.csv", source_column="Answer")
    data = loader.load()

    for i, doc in enumerate(data):
        doc.metadata["row_index"] = i + 1
        doc.metadata["source"] = doc.metadata.get("Info_Url", "")

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    documents = text_splitter.transform_documents(data)

    store = LocalFileStore("./cache/")

    core_embeddings_model = OpenAIEmbeddings()
    embedder = CacheBackedEmbeddings.from_bytes_store(
        core_embeddings_model, store, namespace=core_embeddings_model.model
    )
    docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)

    chain = RetrievalQA.from_chain_type(
        ChatOpenAI(model="gpt-4", temperature=0.0, streaming=True),
        chain_type="stuff",
        return_source_documents=True,
        retriever=docsearch.as_retriever(),
        chain_type_kwargs={"prompt": prompt}
    )
    markdown_table = """
    ### Daysoff Firmahytteordning & Personvern FAQ
    
    | Type ๐Ÿง          | Populรฆre spรธrsmรฅl ๐Ÿ‘‹๐Ÿผ                                                         
    |------------------|-----------------------------------------------------------|
    | Firmahytteordning | โ€ข Jeg har lagt inn en bestilling. Hva skjer videre?      |               
    | Firmahytteordning | โ€ข Hva er betingelser for utleie?                         |                 
    | Firmahytteordning | โ€ข Kan jeg avbestille min reservasjon?                    |                  
    | Personvern        | โ€ข Med hvem deler dere mine personlige opplysninger?      |                
    | Personvern        | โ€ข Hvordan beskytter dere dataene mine?                   |                
    | Firmahytteordning | โ€ข Kan jeg ta med hund eller katt?                        |                 
    """


    msg = cl.Message(
        content=f"FAISS ready. Her er noen eksempler pรฅ spรธrsmรฅl jeg kan svare pรฅ: ๐Ÿ‘‡๐Ÿผ\n\n{markdown_table}"
    )
    await msg.send()


    cl.user_session.set("chain", chain)

@cl.on_message
async def main(message):

    chain = cl.user_session.get("chain")

    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True,
        answer_prefix_tokens=["FINAL", "ANSWER"]
    )

    cb.answer_reached = True

    res = await chain.acall(message, callbacks=[cb])
    return

    answer = res["result"]
    source_elements = []
    visited_sources = set()

    # --documents, user session
    docs = res.get("source_documents", [])
    metadatas = [doc.metadata for doc in docs]
    #all_sources = [m["source"] for m in metadatas]

    # --append source(s), specific rows only
    for doc, metadata in zip(docs, metadatas):
        row_index = metadata.get("row_index", -1)
        source = metadata.get("source", "")

    if row_index in [2, 8, 14] and source and source not in visited_sources:
        visited_sources.add(source)
        source_elements.append(
            cl.Text(content="https://www.daysoff.no" + source, name="Info_Url")
        )

    if source_elements:
        answer += f"\nSources: {', '.join([e.content for e in source_elements])}"
        await cl.Message(content=answer, elements=source_elements).send()
        #await cl.Message(content="Sources: " + ", ".join([e.content for e in source_elements])).send()
        return

    else:
        await cl.Message(content=f"No sources found").send()