File size: 4,928 Bytes
e7de495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8789528
e7de495
 
 
 
4e3d9f3
 
 
1e4eb99
4e3d9f3
 
 
 
e7de495
 
ce8b241
e7de495
 
ce8b241
e7de495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a244b8c
e7de495
 
 
 
b5a6292
e7de495
 
3be33ef
e7de495
4a39d00
 
8a0e8f1
4a39d00
 
e7de495
 
 
 
 
 
 
 
 
d6f7b0b
e7de495
d6f7b0b
e7de495
 
 
 
d6f7b0b
 
 
 
 
 
 
 
 
e312d55
9c81092
8615560
b5a6292
 
e7de495
 
 
 
ebad9f4
e7de495
ebad9f4
e7de495
7803fd6
 
e7de495
ab828e9
e7de495
d6f7b0b
 
 
7803fd6
9c81092
e7de495
 
d6f7b0b
e7de495
4a39d00
e7de495
8a0e8f1
7803fd6
beab8b0
 
18f4e61
8615560
 
 
 
 
18f4e61
7803fd6
2bd73c6
d6f7b0b
8615560
 
 
 
d6f7b0b
 
7803fd6
d6f7b0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import CacheBackedEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.storage import LocalFileStore
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
import chainlit as cl


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

system_template = """
Use the following pieces of context to answer the user's question.
Please respond as if you are a human female customer service representative for Daysoff, 
a Norwegian company that provides welfare services by offering access to cottages and 
apartments for employees of member companies.
By default, you respond (in Norwegian language) using a warm, direct, and professional tone. 
Your expertise covers FAQs, and privacy policies. 
If you don't know the answer, just say that you don't know, don't try to make up an answer:
politely redirect the user to customer service at [email protected] and remind them to always 
include their booking id (bestillingskode). 
You can make inferences based on the context as long as it still faithfully represents the feedback.

Example of how your response should be direct:

```
foo      
```

Begin!
----------------
{context}"""

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}"),
]
prompt = ChatPromptTemplate(messages=messages)
chain_type_kwargs = {"prompt": prompt}

@cl.author_rename
def rename(orig_author: str):
    rename_dict = {"Just a moment": "Thinking.."}
    return rename_dict.get(orig_author, orig_author)

@cl.on_chat_start
async def init():
    msg = cl.Message(content=f"Building vector store...")
    await msg.send()

    loader = CSVLoader(file_path="./data/total_faq.csv", source_column="Answer") 
    data = loader.load()

    for i, doc in enumerate(data):
        doc.metadata["row_index"] = i + 1  
        doc.metadata["source"] = doc.metadata.get("Info_Url", "") 

    documents = text_splitter.transform_documents(data)
    store = LocalFileStore("./cache/")
    core_embeddings_model = OpenAIEmbeddings()
    embedder = CacheBackedEmbeddings.from_bytes_store(
        core_embeddings_model, store, namespace=core_embeddings_model.model
    )
    docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)

    chain = RetrievalQA.from_chain_type(
        ChatOpenAI(model="gpt-4", temperature=0.0, streaming=True), # streaming=False
        chain_type="stuff",
        return_source_documents=True,
        retriever=docsearch.as_retriever(),
        chain_type_kwargs = {"prompt": prompt}
    )

    markdown_table = """
        ### Eksempler pรฅ spรธrsmรฅl
        
        | # | Spรธrsmรฅl                               |
        |---|----------------------------------------|
        | 1 | Hvordan registrerer jeg meg som bruker?|
        | 2 | Kan jeg ha med kjรฆledyr pรฅ hytta?      |
        | 3 | Adferdsmessig annonsering?             |
        | 4 | Hvordan blir dataene mine beskyttet?   |
    """

    msg.content = f"FAISS ready. Bare spรธr ivei..๐Ÿค“\n\n{markdown_table}"
    await msg.send()
    
    cl.user_session.set("chain", chain)

@cl.on_message
async def main(message):
   
    chain = cl.user_session.get("chain")
    
    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True, 
        answer_prefix_tokens=["FINAL", "ANSWER"]
    )
    
    cb.answer_reached = True
    
    cb.suppress_intermediate = True
    
    res = await chain.acall(message, callbacks=[cb])
    
    answer = res["result"]
    source_elements = []
    #visited_sources = set()

    docs = res.get("source_documents", [])
    metadatas = [doc.metadata for doc in docs]
  
    for doc, metadata in zip(docs, metadatas):
        row_index = metadata.get("row_index", -1) 
        source = metadata.get("source", "") 

        if row_index in [2, 8, 14] and source and source not in visited_sources:
            visited_sources.add(source)
            source_elements.append(
                cl.Text(content="https://www.daysoff.no" + source, name="Info_Url")
            )

    if source_elements:
        answer += f"\nSources: {', '.join([e.content for e in source_elements])}"
        #await cl.Message(content="Sources: " + ", ".join([e.content for e in source_elements])).send()
    
    if answer != last_response:
        await cl.Message(content=answer, elements=source_elements if source_elements else None).send()
        last_response = answer
        return
        
    else:
        await cl.Message(content="No new information available.").send()