Spaces:

zliang
/

ClimateChat

Build error

File size: 7,640 Bytes

import openai
import streamlit as st
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

from langchain.prompts.prompt import PromptTemplate

from langchain.vectorstores import FAISS
import re
import time


# import e5-large-v2 embedding model
model_name = "intfloat/e5-large-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# load IPCC database
db = FAISS.load_local("IPCC_index_e5_1000_pdf", embeddings)



def generate_response(input_text):
    docs = db.similarity_search(input_text,k=5)

    json1 = docs[0].metadata
    json2 = docs[1].metadata
    json3 = docs[2].metadata
    json4 = docs[3].metadata
    json5 = docs[4].metadata
    #st.write({"source1":json1["source"], "source2":json2["source"],"source3":json3["source"]})


    climate_TEMPLATE = """\
    You are a professor in climate change, tasked with answering any question \
    about climate change.

    {question}

    Generate a comprehensive and informative answer to the general audience of 100 words or less for the \
    given question based solely on the provided search results (hyperlink and source). You must \
    only use information from the provided search results. Use an unbiased and \
    journalistic tone. Combine search results together into a coherent answer. Do not \
    repeat text. Only use the most \
    relevant results that answer the question accurately. cite them at the end of your answer, list these citations  \
    in a section named "source". 
    
    After the "source" section, add a "highlight" section\
    And add a short highlight  of your answer in humor and make sure no more than 5 words.\
    
    Format your answer in markdown format

    If there is nothing in the context relevant to the question at hand, just say "Hmm, \
    I'm not sure." Don't try to make up an answer.

    Anything between the following `context`  html blocks is retrieved from a knowledge \
    bank, not part of the conversation with the user. 

    <context>
        {context} 
    <context/>

    Anything between the following `sources`  html blocks is the source and hyperlink you should use and list them into a source section\
    <sources>
        [{source1} page {page1}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source1}.pdf#page={page1})
        [{source2} page {page2}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source2}.pdf#page={page2})
        [{source3} page {page3}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source3}.pdf#page={page3})
        [{source4} page {page4}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source4}.pdf#page={page4})
        [{source5} page {page5}](https://www.ipcc.ch/report/ar6/wg3/downloads/report/{source5}.pdf#page={page5})
    <sources/>

    REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
    not sure." Don't try to make up an answer. Anything between the preceding 'context' \
    html blocks is retrieved from a knowledge bank, not part of the conversation with the \
    user.\
    

 
    """
    climate_PROMPT = PromptTemplate(input_variables=["question", "context"], 
                                    partial_variables={"source1":json1["source"], "source2":json2["source"],
                                                       "source3":json3["source"],"source4":json4["source"],"source5":json5["source"],"page1":json1["page"],
                                                       "page2":json2["page"],"page3":json3["page"],"page4":json4["page"],"page5":json5["page"]},
                                    template=climate_TEMPLATE, )
    
    #climate_PROMPT.partial(source = docs[0].metadata)

    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo-16k",
        temperature=0.1,
        max_tokens=2000, 
        openai_api_key=openai_api_key
    )

# Define retriever
    retriever = db.as_retriever(search_kwargs={"k": 5})

    qa_chain = RetrievalQA.from_chain_type(llm, 
                                            retriever=retriever,
                                            chain_type="stuff", #"stuff", "map_reduce","refine", "map_rerank"
                                            return_source_documents=True,
                                            verbose=True,
                                            chain_type_kwargs={"prompt": climate_PROMPT}
                                            )
    
    return qa_chain({'query': input_text})


with st.sidebar:
    openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
    st.markdown("## 🌍 Welcome to ClimateChat! 🌍")
    st.markdown("ClimateChat harnesses the latest [IPCC reports](https://www.ipcc.ch/report/ar6/wg3/) and the power of Large Language Models to answer your questions about climate change. When you interact with ClimateChat not only will you receive clear, concise, and accurate answers, but each response is coupled with sources and hyperlinks for further exploration and verification.\
                Our objective is to make climate change information accessible, understandable, and actionable for everyone, everywhere.")
st.title("💬🌍🌡️ClimateChat")
st.caption("💬 A Climate Change chatbot powered by OpenAI LLM and IPCC documents")
#col1, col2,  = st.columns(2)


if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "content": "Any question about the climate change?"}]

for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

if prompt := st.chat_input():
    if not openai_api_key:
        st.info("Please add your OpenAI API key to continue.")
        st.stop()

    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)
    result = generate_response(prompt)
    result_r = result["result"]
    index = result_r.find("Highlight:")

    # Extract everything after "Highlight:"
    match = re.search(r"Highlight: (.+)", result_r)
    if match:
        highlighted_text = match.group(1)
    else:
        highlighted_text="hello world"




        # Display assistant response in chat message container
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""
        assistant_response = result_r
        # Simulate stream of response with milliseconds delay
        for chunk in assistant_response.split():
            full_response += chunk + " "
            time.sleep(0.05)
            # Add a blinking cursor to simulate typing
            message_placeholder.write(full_response + "▌")
        message_placeholder.write(result_r)
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": result_r})
        
    #st.session_state.messages.append({"role": "assistant", "content": result["result"]})
    #st.chat_message("assistant").write(result_r)
    #display_typing_effect(st.chat_message("assistant"), result_r)
    #st.markdown(result['source_documents'][0])
    #st.markdown(result['source_documents'][1])
    #st.markdown(result['source_documents'][2])
    #st.markdown(result['source_documents'][3])
    #st.markdown(result['source_documents'][4])


    #st.image("https://cataas.com/cat/says/"+highlighted_text)