Spaces:

zliang
/

ClimateChat

Sleeping

App Files Files Community

ClimateChat / app.py

zliang

Update app.py

879a13d almost 2 years ago

raw

history blame

6.56 kB

	import openai
	import streamlit as st
	from langchain.llms import OpenAI
	from langchain.chat_models import ChatOpenAI
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chains import RetrievalQA

	from langchain.prompts.prompt import PromptTemplate

	from langchain.vectorstores import FAISS
	import re
	import time


	# import e5-large-v2 embedding model
	model_name = "intfloat/e5-large-v2"
	model_kwargs = {'device': 'cpu'}
	encode_kwargs = {'normalize_embeddings': False}
	embeddings = HuggingFaceEmbeddings(
	model_name=model_name,
	model_kwargs=model_kwargs,
	encode_kwargs=encode_kwargs
	)

	# load IPCC database
	db = FAISS.load_local("IPCC_index_e5_1000_pdf", embeddings)



	def generate_response(input_text):
	docs = db.similarity_search(input_text,k=5)

	json1 = docs[0].metadata
	json2 = docs[1].metadata
	json3 = docs[2].metadata
	json4 = docs[3].metadata
	json5 = docs[4].metadata
	#st.write({"source1":json1["source"], "source2":json2["source"],"source3":json3["source"]})


	climate_TEMPLATE = """ You are ChatClimate, take a deep breath and use the following pieces of context to provide an answer for general audience. Format your answer in Markdown:"


	Context: {context}

	Question: {question}

	Answer:



	Check if you use the source in your ansewer, make sure list used sources you refer to and their hyperlinks as below in a section named "sources":

	[{source1} page {page1}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source1}.pdf#page={page1})
	[{source2} page {page2}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source2}.pdf#page={page2})
	[{source3} page {page3}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source3}.pdf#page={page3})
	[{source4} page {page4}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source4}.pdf#page={page4})
	[{source5} page {page5}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source5}.pdf#page={page5})


	If you don't know the answer, just say that you don't know, don't try to make up an answer.

	at the end of your answer, make sure to add a short highlight of your answer in humor and make sure no more than 5 words.

	Highlight:
	"""
	climate_PROMPT = PromptTemplate(input_variables=["question", "context"],
	partial_variables={"source1":json1["source"], "source2":json2["source"],
	"source3":json3["source"],"source4":json4["source"],"source5":json5["source"],"page1":json1["page"],
	"page2":json2["page"],"page3":json3["page"],"page4":json4["page"],"page5":json5["page"]},
	template=climate_TEMPLATE, )

	#climate_PROMPT.partial(source = docs[0].metadata)

	llm = ChatOpenAI(
	model_name="gpt-3.5-turbo",
	temperature=0.1,
	max_tokens=2000,
	openai_api_key=openai_api_key
	)

	# Define retriever
	retriever = db.as_retriever(search_kwargs={"k": 5})

	qa_chain = RetrievalQA.from_chain_type(llm,
	retriever=retriever,
	chain_type="stuff", #"stuff", "map_reduce","refine", "map_rerank"
	return_source_documents=True,
	verbose=True,
	chain_type_kwargs={"prompt": climate_PROMPT}
	)

	return qa_chain({'query': input_text})


	with st.sidebar:
	openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
	"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
	st.markdown("## 🌍 Welcome to ClimateChat! 🌍")
	st.markdown("ClimateChat harnesses the latest [IPCC reports](https://www.ipcc.ch/report/ar6/wg3/) and the power of Large Language Models to answer your questions about climate change. When you interact with ClimateChat not only will you receive clear, concise, and accurate answers, but each response is coupled with sources and hyperlinks for further exploration and verification.\
	Our objective is to make climate change information accessible, understandable, and actionable for everyone, everywhere.")
	st.title("💬🌍🌡️ClimateChat")
	st.caption("💬 A Climate Change chatbot powered by OpenAI LLM and IPCC documents")
	#col1, col2, = st.columns(2)


	if "messages" not in st.session_state:
	st.session_state["messages"] = [{"role": "assistant", "content": "Any question about the climate change?"}]

	for msg in st.session_state.messages:
	st.chat_message(msg["role"]).write(msg["content"])

	if prompt := st.chat_input():
	if not openai_api_key:
	st.info("Please add your OpenAI API key to continue.")
	st.stop()

	st.session_state.messages.append({"role": "user", "content": prompt})
	st.chat_message("user").write(prompt)
	result = generate_response(prompt)
	result_r = result["result"]
	index = result_r.find("Highlight:")

	# Extract everything after "Highlight:"
	match = re.search(r"Highlight: (.+)", result_r)
	if match:
	highlighted_text = match.group(1)
	else:
	highlighted_text="hello world"




	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	message_placeholder = st.empty()
	full_response = ""
	assistant_response = result_r
	# Simulate stream of response with milliseconds delay
	for chunk in assistant_response.split():
	full_response += chunk + " "
	time.sleep(0.05)
	# Add a blinking cursor to simulate typing
	message_placeholder.write(full_response + "▌")
	message_placeholder.write(result_r)
	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": result_r})

	#st.session_state.messages.append({"role": "assistant", "content": result["result"]})
	#st.chat_message("assistant").write(result_r)
	#display_typing_effect(st.chat_message("assistant"), result_r)
	#st.markdown(result['source_documents'][0])
	#st.markdown(result['source_documents'][1])
	#st.markdown(result['source_documents'][2])
	#st.markdown(result['source_documents'][3])
	#st.markdown(result['source_documents'][4])


	#st.image("https://cataas.com/cat/says/"+highlighted_text)