Upload query_data.py
Browse files- query_data.py +107 -0
query_data.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
2 |
+
from langchain.prompts.prompt import PromptTemplate
|
3 |
+
from langchain.vectorstores.base import VectorStoreRetriever
|
4 |
+
from langchain.chat_models import ChatOpenAI
|
5 |
+
|
6 |
+
from langchain.memory import ConversationBufferMemory
|
7 |
+
import pickle
|
8 |
+
|
9 |
+
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
|
10 |
+
You can assume the question about the most recent state of the union address.
|
11 |
+
|
12 |
+
Chat History:
|
13 |
+
{chat_history}
|
14 |
+
Follow Up Input: {question}
|
15 |
+
Standalone question:"""
|
16 |
+
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
17 |
+
|
18 |
+
template = """You are an AI assistant for answering questions about the most recent state of the union address.
|
19 |
+
You are given the following extracted parts of a long document and a question. Provide a conversational answer.
|
20 |
+
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
21 |
+
If the question is not about the most recent state of the union, politely inform them that you are tuned to only answer questions about the most recent state of the union.
|
22 |
+
Lastly, answer the question as if you were a pirate from the south seas and are just coming back from a pirate expedition where you found a treasure chest full of gold doubloons.
|
23 |
+
Question: {question}
|
24 |
+
=========
|
25 |
+
{context}
|
26 |
+
=========
|
27 |
+
Answer in Markdown:"""
|
28 |
+
QA_PROMPT = PromptTemplate(template=template, input_variables=[
|
29 |
+
"question", "context"])
|
30 |
+
|
31 |
+
|
32 |
+
def load_retriever():
|
33 |
+
with open("vectorstore.pkl", "rb") as f:
|
34 |
+
vectorstore = pickle.load(f)
|
35 |
+
retriever = VectorStoreRetriever(vectorstore=vectorstore)
|
36 |
+
return retriever
|
37 |
+
|
38 |
+
|
39 |
+
def get_basic_qa_chain():
|
40 |
+
llm = ChatOpenAI(model_name="gpt-4", temperature=0)
|
41 |
+
retriever = load_retriever()
|
42 |
+
memory = ConversationBufferMemory(
|
43 |
+
memory_key="chat_history", return_messages=True)
|
44 |
+
model = ConversationalRetrievalChain.from_llm(
|
45 |
+
llm=llm,
|
46 |
+
retriever=retriever,
|
47 |
+
memory=memory)
|
48 |
+
return model
|
49 |
+
|
50 |
+
|
51 |
+
def get_custom_prompt_qa_chain():
|
52 |
+
llm = ChatOpenAI(model_name="gpt-4", temperature=0)
|
53 |
+
retriever = load_retriever()
|
54 |
+
memory = ConversationBufferMemory(
|
55 |
+
memory_key="chat_history", return_messages=True)
|
56 |
+
# see: https://github.com/langchain-ai/langchain/issues/6635
|
57 |
+
# see: https://github.com/langchain-ai/langchain/issues/1497
|
58 |
+
model = ConversationalRetrievalChain.from_llm(
|
59 |
+
llm=llm,
|
60 |
+
retriever=retriever,
|
61 |
+
memory=memory,
|
62 |
+
combine_docs_chain_kwargs={"prompt": QA_PROMPT})
|
63 |
+
return model
|
64 |
+
|
65 |
+
|
66 |
+
def get_condense_prompt_qa_chain():
|
67 |
+
llm = ChatOpenAI(model_name="gpt-4", temperature=0)
|
68 |
+
retriever = load_retriever()
|
69 |
+
memory = ConversationBufferMemory(
|
70 |
+
memory_key="chat_history", return_messages=True)
|
71 |
+
# see: https://github.com/langchain-ai/langchain/issues/5890
|
72 |
+
model = ConversationalRetrievalChain.from_llm(
|
73 |
+
llm=llm,
|
74 |
+
retriever=retriever,
|
75 |
+
memory=memory,
|
76 |
+
condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
77 |
+
combine_docs_chain_kwargs={"prompt": QA_PROMPT})
|
78 |
+
return model
|
79 |
+
|
80 |
+
|
81 |
+
def get_qa_with_sources_chain():
|
82 |
+
llm = ChatOpenAI(model_name="gpt-4", temperature=0)
|
83 |
+
retriever = load_retriever()
|
84 |
+
history = []
|
85 |
+
model = ConversationalRetrievalChain.from_llm(
|
86 |
+
llm=llm,
|
87 |
+
retriever=retriever,
|
88 |
+
return_source_documents=True)
|
89 |
+
|
90 |
+
def model_func(question):
|
91 |
+
# bug: this doesn't work with the built-in memory
|
92 |
+
# hacking around it for the tutorial
|
93 |
+
# see: https://github.com/langchain-ai/langchain/issues/5630
|
94 |
+
new_input = {"question": question['question'], "chat_history": history}
|
95 |
+
result = model(new_input)
|
96 |
+
history.append((question['question'], result['answer']))
|
97 |
+
return result
|
98 |
+
|
99 |
+
return model_func
|
100 |
+
|
101 |
+
|
102 |
+
chain_options = {
|
103 |
+
"basic": get_basic_qa_chain,
|
104 |
+
"with_sources": get_qa_with_sources_chain,
|
105 |
+
"custom_prompt": get_custom_prompt_qa_chain,
|
106 |
+
"condense_prompt": get_condense_prompt_qa_chain
|
107 |
+
}
|