retrieval-augmentation-svb

Runtime error

App Files Files Community

intoxication

tanaysoni commited on Sep 5, 2023

Commit

852c61d

0 Parent(s):

Duplicate from deepset/retrieval-augmentation-svb

Browse files

Co-authored-by: Tanay Soni <[email protected]>

Files changed (13) hide show

.gitattributes +34 -0
.streamlit/config.toml +13 -0
README.md +13 -0
app.py +57 -0
data/my_faiss_index.faiss +0 -0
data/my_faiss_index.json +1 -0
faiss_document_store.db +0 -0
logo/haystack-logo-colored.png +0 -0
requirements.txt +7 -0
utils/__init__.py +0 -0
utils/backend.py +64 -0
utils/constants.py +14 -0
utils/ui.py +112 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[theme]
+base = "light"
+font="monospace"
+[global]
+# By default, Streamlit checks if the Python watchdog module is available and, if not, prints a warning asking for you to install it. The watchdog module is not required, but highly recommended. It improves Streamlit's ability to detect changes to files in your filesystem.
+# If you'd like to turn off this warning, set this to True.
+# Default: false
+disableWatchdogWarning = true
+# If True, will show a warning when you run a Streamlit-enabled script via "python my_script.py".
+# Default: true
+showWarningOnDirectExecution = false

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Retrieval Augmented Generative QA
+emoji: 👁
+colorFrom: blue
+colorTo: pink
+sdk: streamlit
+sdk_version: 1.19.0
+app_file: app.py
+pinned: false
+duplicated_from: deepset/retrieval-augmentation-svb
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import streamlit as st
+from utils.backend import (get_plain_pipeline, get_retrieval_augmented_pipeline,
+                           get_web_retrieval_augmented_pipeline)
+from utils.ui import left_sidebar, right_sidebar, main_column
+from utils.constants import BUTTON_LOCAL_RET_AUG
+st.set_page_config(
+    page_title="Retrieval Augmentation with Haystack",
+    layout="wide"
+)
+left_sidebar()
+st.markdown("<center> <h2> Reduce Hallucinations 😵‍💫 with Retrieval Augmentation </h2> </center>", unsafe_allow_html=True)
+st.markdown("<center>Ask a question about the collapse of the Silicon Valley Bank (SVB).</center>", unsafe_allow_html=True)
+col_1, col_2 = st.columns([4, 2], gap="small")
+with col_1:
+    run_pressed, placeholder_plain_gpt, placeholder_retrieval_augmented = main_column()
+with col_2:
+    right_sidebar()
+if st.session_state.get('query') and run_pressed:
+    ip = st.session_state['query']
+    with st.spinner('Loading pipelines... \n This may take a few mins and might also fail if OpenAI API server is down.'):
+        p1 = get_plain_pipeline()
+    with st.spinner('Fetching answers from plain GPT... '
+                    '\n This may take a few mins and might also fail if OpenAI API server is down.'):
+        answers = p1.run(ip)
+    placeholder_plain_gpt.markdown(answers['results'][0])
+    if st.session_state.get("query_type", BUTTON_LOCAL_RET_AUG) == BUTTON_LOCAL_RET_AUG:
+        with st.spinner(
+                'Loading Retrieval Augmented pipeline that can fetch relevant documents from local data store... '
+                '\n This may take a few mins and might also fail if OpenAI API server is down.'):
+            p2 = get_retrieval_augmented_pipeline()
+        with st.spinner('Getting relevant documents from documented stores and calculating answers... '
+                        '\n This may take a few mins and might also fail if OpenAI API server is down.'):
+            answers_2 = p2.run(ip)
+    else:
+        with st.spinner(
+                'Loading Retrieval Augmented pipeline that can fetch relevant documents from the web... \
+                n This may take a few mins and might also fail if OpenAI API server is down.'):
+            p3 = get_web_retrieval_augmented_pipeline()
+        with st.spinner('Getting relevant documents from the Web and calculating answers... '
+                        '\n This may take a few mins and might also fail if OpenAI API server is down.'):
+            answers_2 = p3.run(ip)
+    placeholder_retrieval_augmented.markdown(answers_2['results'][0])
+    with st.expander("See source:"):
+        src = answers_2['invocation_context']['documents'][0].content.replace("$", "\$")
+        split_marker = "\n\n" if "\n\n" in src else "\n"
+        src = " ".join(src.split(split_marker))[0:2000] + "..."
+        if answers_2['invocation_context']['documents'][0].meta.get('link'):
+            title = answers_2['invocation_context']['documents'][0].meta.get('link')
+            src = '"' + title + '": ' + src
+        st.write(src)

data/my_faiss_index.faiss ADDED Viewed

Binary file (154 kB). View file

data/my_faiss_index.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"faiss_index_factory_str": "Flat"}

faiss_document_store.db ADDED Viewed

Binary file (274 kB). View file

logo/haystack-logo-colored.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+farm-haystack==1.17.1
+faiss-cpu==1.7.2
+sqlalchemy>=1.4.2,<2
+sqlalchemy_utils
+psycopg2-binary
+streamlit==1.19.0
+altair<5

utils/__init__.py ADDED Viewed

File without changes

utils/backend.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import streamlit as st
+from haystack import Pipeline
+from haystack.document_stores import FAISSDocumentStore
+from haystack.nodes import Shaper, PromptNode, PromptTemplate, PromptModel, EmbeddingRetriever
+from haystack.nodes.retriever.web import WebRetriever
+@st.cache_resource(show_spinner=False)
+def get_plain_pipeline():
+    prompt_open_ai = PromptModel(model_name_or_path="text-davinci-003", api_key=st.secrets["OPENAI_API_KEY"])
+    # Now let make one PromptNode use the default model and the other one the OpenAI model:
+    plain_llm_template = PromptTemplate(name="plain_llm", prompt_text="Answer the following question: {query}")
+    node_openai = PromptNode(prompt_open_ai, default_prompt_template=plain_llm_template, max_length=300)
+    pipeline = Pipeline()
+    pipeline.add_node(component=node_openai, name="prompt_node", inputs=["Query"])
+    return pipeline
+@st.cache_resource(show_spinner=False)
+def get_retrieval_augmented_pipeline():
+    ds = FAISSDocumentStore(faiss_index_path="data/my_faiss_index.faiss",
+                            faiss_config_path="data/my_faiss_index.json")
+    retriever = EmbeddingRetriever(
+        document_store=ds,
+        embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
+        model_format="sentence_transformers",
+        top_k=2
+    )
+    default_template = PromptTemplate(
+        name="question-answering",
+        prompt_text="Given the context please answer the question. Context: {join(documents)}; Question: "
+                    "{query}; Answer:",
+    )
+    # Let's initiate the PromptNode
+    node = PromptNode("text-davinci-003", default_prompt_template=default_template,
+                      api_key=st.secrets["OPENAI_API_KEY"], max_length=500)
+    # Let's create a simple retrieval augmented pipeline with the retriever + PromptNode
+    pipeline = Pipeline()
+    pipeline.add_node(component=retriever, name='retriever', inputs=['Query'])
+    pipeline.add_node(component=node, name="prompt_node", inputs=["retriever"])
+    return pipeline
+@st.cache_resource(show_spinner=False)
+def get_web_retrieval_augmented_pipeline():
+    search_key = st.secrets["WEBRET_API_KEY"]
+    web_retriever = WebRetriever(api_key=search_key, search_engine_provider="SerperDev")
+    default_template = PromptTemplate(
+        name="question-answering",
+        prompt_text="Given the context please answer the question. Context: {join(documents)}; Question: "
+                    "{query}; Answer:",
+    )
+    # Let's initiate the PromptNode
+    node = PromptNode("text-davinci-003", default_prompt_template=default_template,
+                      api_key=st.secrets["OPENAI_API_KEY"], max_length=500)
+    # Let's create a pipeline with the webretriever + PromptNode
+    pipeline = Pipeline()
+    pipeline.add_node(component=web_retriever, name='retriever', inputs=['Query'])
+    pipeline.add_node(component=node, name="prompt_node", inputs=["retriever"])
+    return pipeline

utils/constants.py ADDED Viewed

	@@ -0,0 +1,14 @@

+QUERIES = [
+    "Did SVB collapse?",
+    "Why did SVB collapse?",
+    "What does SVB failure mean for our economy?",
+    "Who is responsible for SVB collapse?",
+    "When did SVB collapse?"
+]
+PLAIN_GPT_ANS = "Answer with plain GPT"
+GPT_LOCAL_RET_AUG_ANS = "Answer with Retrieval augmented GPT (static news dataset)"
+GPT_WEB_RET_AUG_ANS = "Answer with Retrieval augmented GPT (web search)"
+BUTTON_LOCAL_RET_AUG = "Retrieval augmented (static news dataset)"
+BUTTON_WEB_RET_AUG = "Retrieval augmented with web search"

utils/ui.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import streamlit as st
+from PIL import Image
+from .constants import (QUERIES, PLAIN_GPT_ANS, GPT_WEB_RET_AUG_ANS, GPT_LOCAL_RET_AUG_ANS,
+                        BUTTON_LOCAL_RET_AUG, BUTTON_WEB_RET_AUG)
+def set_question():
+    st.session_state['query'] = st.session_state['q_drop_down']
+def set_q1():
+    st.session_state['query'] = QUERIES[0]
+def set_q2():
+    st.session_state['query'] = QUERIES[1]
+def set_q3():
+    st.session_state['query'] = QUERIES[2]
+def set_q4():
+    st.session_state['query'] = QUERIES[3]
+def set_q5():
+    st.session_state['query'] = QUERIES[4]
+def main_column():
+    placeholder = st.empty()
+    with placeholder:
+        search_bar, button = st.columns([3, 1])
+        with search_bar:
+            _ = st.text_area(f" ", max_chars=200, key='query')
+        with button:
+            st.write(" ")
+            st.write(" ")
+            run_pressed = st.button("Run", key="run")
+    st.write(" ")
+    st.radio("Answer Type:", (BUTTON_LOCAL_RET_AUG, BUTTON_WEB_RET_AUG), key="query_type")
+    st.markdown(f"<h5>{PLAIN_GPT_ANS}</h5>", unsafe_allow_html=True)
+    placeholder_plain_gpt = st.empty()
+    placeholder_plain_gpt.text_area(f" ", placeholder="The answer will appear here.", disabled=True,
+                                    key=PLAIN_GPT_ANS, height=1, label_visibility='collapsed')
+    if st.session_state.get("query_type", BUTTON_LOCAL_RET_AUG) == BUTTON_LOCAL_RET_AUG:
+        st.markdown(f"<h5>{GPT_LOCAL_RET_AUG_ANS}</h5>", unsafe_allow_html=True)
+    else:
+        st.markdown(f"<h5>{GPT_WEB_RET_AUG_ANS}</h5>", unsafe_allow_html=True)
+    placeholder_retrieval_augmented = st.empty()
+    placeholder_retrieval_augmented.text_area(f" ", placeholder="The answer will appear here.", disabled=True,
+                                              key=GPT_LOCAL_RET_AUG_ANS, height=1, label_visibility='collapsed')
+    return run_pressed, placeholder_plain_gpt, placeholder_retrieval_augmented
+def right_sidebar():
+    st.write("")
+    st.write("")
+    st.markdown("<h5> Example questions </h5>", unsafe_allow_html=True)
+    st.button(QUERIES[0], on_click=set_q1, use_container_width=True)
+    st.button(QUERIES[1], on_click=set_q2, use_container_width=True)
+    st.button(QUERIES[2], on_click=set_q3, use_container_width=True)
+    st.button(QUERIES[3], on_click=set_q4, use_container_width=True)
+    st.button(QUERIES[4], on_click=set_q5, use_container_width=True)
+def left_sidebar():
+    with st.sidebar:
+        image = Image.open('logo/haystack-logo-colored.png')
+        st.markdown("Thanks for coming to this :hugging_face: space. \n\n"
+                    "This is an effort towards showcasing how you can use Haystack for Retrieval Augmented QA, "
+                    "with local [FAISSDocumentStore](https://docs.haystack.deepset.ai/reference/document-store-api#faissdocumentstore)"
+                    " or a [WebRetriever](https://docs.haystack.deepset.ai/docs/retriever#retrieval-from-the-web). \n\n"
+                    "More information on how this was built and instructions along "
+                    "with a repository will be published soon and updated here.")
+        # st.markdown(
+        #     "## How to use\n"
+        #     "1. Enter your [OpenAI API key](https://platform.openai.com/account/api-keys) below\n"
+        #     "2. Enter a Serper Dev API key\n"
+        #     "3. Enjoy 🤗\n"
+        # )
+        # api_key_input = st.text_input(
+        #     "OpenAI API Key",
+        #     type="password",
+        #     placeholder="Paste your OpenAI API key here (sk-...)",
+        #     help="You can get your API key from https://platform.openai.com/account/api-keys.",
+        #     value=st.session_state.get("OPENAI_API_KEY", ""),
+        # )
+        # if api_key_input:
+        #     set_openai_api_key(api_key_input)
+        st.markdown("---")
+        st.markdown(
+            "## How this works\n"
+            "This app was built with [Haystack](https://haystack.deepset.ai) using the"
+            " [PromptNode](https://docs.haystack.deepset.ai/docs/prompt_node), "
+            "[Retriever](https://docs.haystack.deepset.ai/docs/retriever#embedding-retrieval-recommended),"
+            "and [FAISSDocumentStore](https://docs.haystack.deepset.ai/reference/document-store-api#faissdocumentstore).\n\n"
+            " You can find the source code in **Files and versions** tab."
+        )
+        st.markdown("---")
+        st.image(image, width=250)