Spaces:

zliang
/

ClimateChat

Running

App Files Files Community

zliang commited on Sep 28, 2023

Commit

ed7625a

1 Parent(s): 5d9d034

Create app.py

Browse files

Files changed (1) hide show

app.py +149 -0

app.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import openai
+import streamlit as st
+from langchain.llms import OpenAI
+from langchain.chat_models import ChatOpenAI
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.chains import RetrievalQA
+from langchain.prompts.prompt import PromptTemplate
+from langchain.vectorstores import FAISS
+import re
+import time
+# class CustomRetrievalQAWithSourcesChain(RetrievalQAWithSourcesChain):
+#         def _get_docs(self, inputs: Dict[str, Any]) -> List[Document]:
+#             # Call the parent class's method to get the documents
+#             docs = super()._get_docs(inputs)
+#             # Modify the document metadata
+#             for doc in docs:
+#                 doc.metadata['source'] = doc.metadata.pop('path')
+#             return docs
+model_name = "intfloat/e5-large-v2"
+model_kwargs = {'device': 'cuda'}
+encode_kwargs = {'normalize_embeddings': False}
+embeddings = HuggingFaceEmbeddings(
+    model_name=model_name,
+    model_kwargs=model_kwargs,
+    encode_kwargs=encode_kwargs
+)
+db = FAISS.load_local("IPCC_index_e5_1000_pdf", embeddings)
+def generate_response(input_text):
+    docs = db.similarity_search(input_text,k=5)
+    json1 = docs[0].metadata
+    json2 = docs[1].metadata
+    json3 = docs[2].metadata
+    json4 = docs[3].metadata
+    json5 = docs[4].metadata
+    #st.write({"source1":json1["source"], "source2":json2["source"],"source3":json3["source"]})
+    climate_TEMPLATE = """ You are ChatClimate, take a deep breath and provide an answer to educated general audience based on the context, and Format your answer in Markdown. :"
+    Context: {context}
+    Question: {question}
+    Answer:
+    check if you use the info below, if you used please add used source for in-text reference, if not used, do not add them .
+    [{source1} page {page1}]
+    [{source2} page {page2}]
+    [{source3} page {page3}]
+    [{source4} page {page4}]
+    [{source5} page {page5}]
+   Check if you use the source in your ansewer, make sure list used sources you refer to  and their hyperlinks as below in a section named "sources":
+    [{source1} page {page1}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source1}.pdf#page={page1})
+    [{source2} page {page2}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source2}.pdf#page={page2})
+    [{source3} page {page3}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source3}.pdf#page={page3})
+    [{source4} page {page4}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source4}.pdf#page={page4})
+    [{source5} page {page5}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source5}.pdf#page={page5})
+    at the end of your answer, make sure to add a short highlight  of your answer in humor and make sure no more than 5 words.
+    Highlight:
+    """
+    climate_PROMPT = PromptTemplate(input_variables=["question", "context"],
+                                    partial_variables={"source1":json1["source"], "source2":json2["source"],
+                                                       "source3":json3["source"],"source4":json4["source"],"source5":json5["source"],"page1":json1["page"],
+                                                       "page2":json2["page"],"page3":json3["page"],"page4":json4["page"],"page5":json5["page"]},
+                                    template=climate_TEMPLATE, )
+    #climate_PROMPT.partial(source = docs[0].metadata)
+    llm = ChatOpenAI(
+        model_name="gpt-3.5-turbo",
+        temperature=0.1,
+        max_tokens=2000,
+        openai_api_key=openai_api_key
+    )
+# Define retriever
+    retriever = db.as_retriever(search_kwargs={"k": 5})
+    qa_chain = RetrievalQA.from_chain_type(llm,
+                                            retriever=retriever,
+                                            chain_type="stuff", #"stuff", "map_reduce","refine", "map_rerank"
+                                            return_source_documents=True,
+                                            verbose=True,
+                                            chain_type_kwargs={"prompt": climate_PROMPT}
+                                            )
+    return qa_chain({'query': input_text})
+with st.sidebar:
+    openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
+    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
+st.title("💬🌍🌡️Ask question about Climate Change")
+st.caption("🚀 A Climate Change chatbot powered by OpenAI LLM")
+#col1, col2,  = st.columns(2)
+if "messages" not in st.session_state:
+    st.session_state["messages"] = [{"role": "assistant", "content": "I'm a Chatbot who can answer your questions about the climate change!"}]
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+if prompt := st.chat_input():
+    if not openai_api_key:
+        st.info("Please add your OpenAI API key to continue.")
+        st.stop()
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    st.chat_message("user").write(prompt)
+    result = generate_response(prompt)
+    result_r = result["result"]
+    index = result_r.find("Highlight:")
+    # Extract everything after "Highlight:"
+    match = re.search(r"Highlight: (.+)", result_r)
+    if match:
+        highlighted_text = match.group(1)
+    else:
+        highlighted_text="hello world"
+    st.session_state.messages.append({"role": "assistant", "content": result["result"]})
+    st.chat_message("assistant").write(result_r)
+    #display_typing_effect(st.chat_message("assistant"), result_r)
+    #st.markdown(result['source_documents'][0])
+    #st.markdown(result['source_documents'][1])
+    #st.markdown(result['source_documents'][2])
+    #st.markdown(result['source_documents'][3])
+    #st.markdown(result['source_documents'][4])
+    st.image("https://cataas.com/cat/says/"+highlighted_text)