Spaces:

daneshjoy
/

idr

Runtime error

idr

File size: 6,343 Bytes

import os

import streamlit as st

from lfqa import prepare, answer

# %% ------------------------------------------- Creating Doc store
if not os.path.exists(faiss_doc_store.db) or not os.path.exits(faiss_index.faiss):
    from haystack.document_stores import FAISSDocumentStore
    from haystack.nodes import DensePassageRetriever
    from haystack.utils import convert_files_to_docs, clean_wiki_text


    module_dir = os.path.dirname(os.path.abspath(__file__))
    os.chdir(module_dir) 

    doc_dir = "data/wiki_gameofthrones_txt12"
    sql_file = 'faiss_doc_store.db'
    faiss_file = 'faiss_index.faiss'

    # %% Download/Load Docs

    # Get some files that we want to use
    # s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt12.zip"
    # fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

    print('---> Loading Documents ...')

    # Convert files to docs + cleaning
    docs = convert_files_to_docs(dir_path=doc_dir,
                                clean_func=clean_wiki_text,
                                split_paragraphs=True)

    # %% Document Store

    print('---> Creating document store ...')
    document_store = FAISSDocumentStore(embedding_dim=128,
                                        faiss_index_factory_str="Flat",
                                        sql_url=f"sqlite:///{sql_file}")



    # %% Retriever (DPR)

    print('---> Initializing retriever ...')
    retriever = DensePassageRetriever(
        document_store=document_store,
        query_embedding_model="vblagoje/dpr-question_encoder-single-lfqa-wiki",
        passage_embedding_model="vblagoje/dpr-ctx_encoder-single-lfqa-wiki",
        use_gpu=True
    )

    # %% Create Embeddings  and save results
    document_store.update_embeddings(retriever)

    print('---> Saving results ...')
    # update db
    document_store.write_documents(docs)
    # save faiss file
    document_store.save(faiss_file)

    print('Done!')


# %% ------------------------------------------- Main App


# Sliders
DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
# Adjust to a question that you would like users to see in the search bar when they load the UI:
DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "Tell me something about Arya Stark?")


def set_state_if_absent(key, value):
    if key not in st.session_state:
        st.session_state[key] = value

def reset_results(*args):
    st.session_state.answer = None
    st.session_state.results = None
        
def main(pipe):
    st.set_page_config(page_title="Haystack Demo", page_icon="https://haystack.deepset.ai/img/HaystackIcon.png")
    
    # Persistent state
    set_state_if_absent("question", DEFAULT_QUESTION_AT_STARTUP)
    set_state_if_absent("results", None)
    
    st.write("# Long-Form Question Answering")
    st.markdown("""
       This demo takes its data from a selection of Wikipedia pages on the topic of the **Game of Thrones** TV series         
    """)
    
    # Sidebar
    st.sidebar.header("Options")
    top_k_retriever = st.sidebar.slider(
        "Max. number of documents from retriever",
        min_value=1,
        max_value=10,
        value=DEFAULT_DOCS_FROM_RETRIEVER,
        step=1,
        on_change=reset_results,
    )
    # eval_mode = st.sidebar.checkbox("Evaluation mode")
    # debug = st.sidebar.checkbox("Show debug info")


    st.sidebar.markdown(
        """
    <style>
        a {{
            text-decoration: none;
        }}
        .haystack-footer {{
            text-align: center;
        }}
        .haystack-footer h4 {{
            margin: 0.1rem;
            padding:0;
        }}
        footer {{
            opacity: 0;
        }}
    </style>
    <div class="haystack-footer">
        <hr />
        <h4>Built with <a href="https://www.deepset.ai/haystack">Haystack</a></h4>
        <p>Get it on <a href="https://github.com/deepset-ai/haystack/">GitHub</a> &nbsp;&nbsp; - &nbsp;&nbsp; Read the <a href="https://haystack.deepset.ai/overview/intro">Docs</a></p>
        <small>Dataset link: <a href="https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt12.zip"">Game of Thrones Wiki</a> <br />See the <a href="https://creativecommons.org/licenses/by-sa/3.0/">License</a> (CC BY-SA 3.0).</small>
    </div>
    """,
        unsafe_allow_html=True,
    )

    # Search bar
    question = st.text_input(
        value=st.session_state.question,
        max_chars=100,
        on_change=reset_results,
        label="question",
        label_visibility="hidden",
    )
    col1, col2 = st.columns(2)
    col1.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)

    # Run button
    run_pressed = col1.button("Run")

    run_query = run_pressed or (question != st.session_state.question)

    if run_query and question:
        reset_results()
        st.session_state.question = question
        
        with st.spinner(
            "🧠 &nbsp;&nbsp; Performing neural search on documents... \n "):
            try:
                st.session_state.results = answer(pipe, question, top_k_retriever)
            # except JSONDecodeError as je:
            #     st.error("👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
            #     return
            except Exception as e:
                # logging.exception(e)
                if "The server is busy processing requests" in str(e) or "503" in str(e):
                    st.error("🧑‍🌾 &nbsp;&nbsp; All our workers are busy! Try again later.")
                else:
                    st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
                return
    
    if st.session_state.results:
        st.session_state.answer = st.session_state.results['answers'][0].answer
        st.write(st.session_state.answer)
        st.write('Doc IDs:')
        st.write(st.session_state.results['answers'][0].meta['doc_ids'])
        st.write('Doc Scores:')
        st.write(st.session_state.results['answers'][0].meta['doc_scores'])
        for i in range(top_k_retriever):
            st.write(st.session_state.results['answers'][0].meta['content'][i])
            st.markdown('---\n')

pipe = prepare()
main(pipe)