Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import logging | |
| import pandas as pd | |
| from haystack.utils import print_answers | |
| from haystack.pipelines import Pipeline | |
| from haystack.document_stores import ElasticsearchDocumentStore | |
| from haystack.nodes import EmbeddingRetriever | |
| from haystack.nodes.other.docs2answers import Docs2Answers | |
| from haystack.utils import launch_es, fetch_archive_from_http | |
| # Initialize logging | |
| logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING) | |
| logging.getLogger("haystack").setLevel(logging.INFO) | |
| # Launch Elasticsearch | |
| launch_es() | |
| # Initialize the Haystack pipeline and document store | |
| document_store = ElasticsearchDocumentStore( | |
| host="localhost", | |
| username="", | |
| password="", | |
| index="document", | |
| embedding_field="question_emb", | |
| embedding_dim=384, | |
| excluded_meta_data=["question_emb"], | |
| similarity="cosine", | |
| ) | |
| retriever = EmbeddingRetriever( | |
| document_store=document_store, | |
| embedding_model="sentence-transformers/all-MiniLM-L6-v2", | |
| use_gpu=True, | |
| scale_score=False, | |
| ) | |
| doc_to_answers = Docs2Answers() | |
| doc_dir = "data/basic_faq_pipeline" | |
| s3_url = "https://core-engineering.s3.eu-central-1.amazonaws.com/public/scripts/small_faq_covid.csv1.zip" | |
| fetch_archive_from_http(url=s3_url, output_dir=doc_dir) | |
| df = pd.read_csv(f"{doc_dir}/small_faq_covid.csv") | |
| # Minimal cleaning | |
| df.fillna(value="", inplace=True) | |
| df["question"] = df["question"].apply(lambda x: x.strip()) | |
| # Get embeddings for our questions from the FAQs | |
| questions = list(df["question"].values) | |
| df["question_emb"] = retriever.embed_queries(queries=questions).tolist() | |
| df = df.rename(columns={"question": "content"}) | |
| # Convert Dataframe to list of dicts and index them in our DocumentStore | |
| docs_to_index = df.to_dict(orient="records") | |
| document_store.write_documents(docs_to_index) | |
| # Initialize a Pipeline (this time without a reader) and ask questions | |
| pipeline = Pipeline() | |
| pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) | |
| pipeline.add_node(component=doc_to_answers, name="Docs2Answers", inputs=["Retriever"]) | |
| # Create the Streamlit app | |
| st.title("FAQ Search") | |
| question = st.text_input("Ask a question:") | |
| if question: | |
| params = {"Retriever": {"top_k": 10}} # Modify parameters as needed | |
| prediction = pipeline.run(query=question, params=params) | |
| st.subheader("Answers:") | |
| print_answers(prediction, details="medium") | |