Spaces:
Sleeping
Sleeping
File size: 5,855 Bytes
490a9f7 ed7625a a6e5795 ed7625a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import streamlit as st
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts.prompt import PromptTemplate
from langchain.vectorstores import FAISS
import re
import time
# class CustomRetrievalQAWithSourcesChain(RetrievalQAWithSourcesChain):
# def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]:
# # Call the parent class's method to get the documents
# docs = super()._get_docs(inputs)
# # Modify the document metadata
# for doc in docs:
# doc.metadata['source'] = doc.metadata.pop('path')
# return docs
model_name = "intfloat/e5-large-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
db = FAISS.load_local("IPCC_index_e5_1000_pdf", embeddings)
def generate_response(input_text):
docs = db.similarity_search(input_text,k=5)
json1 = docs[0].metadata
json2 = docs[1].metadata
json3 = docs[2].metadata
json4 = docs[3].metadata
json5 = docs[4].metadata
#st.write({"source1":json1["source"], "source2":json2["source"],"source3":json3["source"]})
climate_TEMPLATE = """ You are ChatClimate, take a deep breath and provide an answer to educated general audience based on the context, and Format your answer in Markdown. :"
Context: {context}
Question: {question}
Answer:
check if you use the info below, if you used please add used source for in-text reference, if not used, do not add them .
[{source1} page {page1}]
[{source2} page {page2}]
[{source3} page {page3}]
[{source4} page {page4}]
[{source5} page {page5}]
Check if you use the source in your ansewer, make sure list used sources you refer to and their hyperlinks as below in a section named "sources":
[{source1} page {page1}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source1}.pdf#page={page1})
[{source2} page {page2}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source2}.pdf#page={page2})
[{source3} page {page3}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source3}.pdf#page={page3})
[{source4} page {page4}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source4}.pdf#page={page4})
[{source5} page {page5}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source5}.pdf#page={page5})
at the end of your answer, make sure to add a short highlight of your answer in humor and make sure no more than 5 words.
Highlight:
"""
climate_PROMPT = PromptTemplate(input_variables=["question", "context"],
partial_variables={"source1":json1["source"], "source2":json2["source"],
"source3":json3["source"],"source4":json4["source"],"source5":json5["source"],"page1":json1["page"],
"page2":json2["page"],"page3":json3["page"],"page4":json4["page"],"page5":json5["page"]},
template=climate_TEMPLATE, )
#climate_PROMPT.partial(source = docs[0].metadata)
llm = ChatOpenAI(
model_name="gpt-3.5-turbo",
temperature=0.1,
max_tokens=2000,
openai_api_key=openai_api_key
)
# Define retriever
retriever = db.as_retriever(search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(llm,
retriever=retriever,
chain_type="stuff", #"stuff", "map_reduce","refine", "map_rerank"
return_source_documents=True,
verbose=True,
chain_type_kwargs={"prompt": climate_PROMPT}
)
return qa_chain({'query': input_text})
with st.sidebar:
openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
st.title("π¬ππ‘οΈAsk question about Climate Change")
st.caption("π A Climate Change chatbot powered by OpenAI LLM")
#col1, col2, = st.columns(2)
if "messages" not in st.session_state:
st.session_state["messages"] = [{"role": "assistant", "content": "I'm a Chatbot who can answer your questions about the climate change!"}]
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])
if prompt := st.chat_input():
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
result = generate_response(prompt)
result_r = result["result"]
index = result_r.find("Highlight:")
# Extract everything after "Highlight:"
match = re.search(r"Highlight: (.+)", result_r)
if match:
highlighted_text = match.group(1)
else:
highlighted_text="hello world"
st.session_state.messages.append({"role": "assistant", "content": result["result"]})
st.chat_message("assistant").write(result_r)
#display_typing_effect(st.chat_message("assistant"), result_r)
#st.markdown(result['source_documents'][0])
#st.markdown(result['source_documents'][1])
#st.markdown(result['source_documents'][2])
#st.markdown(result['source_documents'][3])
#st.markdown(result['source_documents'][4])
st.image("https://cataas.com/cat/says/"+highlighted_text) |