Spaces:
Runtime error
Runtime error
import streamlit as st | |
import logging | |
import pandas as pd | |
from haystack.utils import print_answers | |
from haystack.pipelines import Pipeline | |
from haystack.document_stores import ElasticsearchDocumentStore | |
from haystack.nodes import EmbeddingRetriever | |
from haystack.nodes.other.docs2answers import Docs2Answers | |
from haystack.utils import launch_es, fetch_archive_from_http | |
# Initialize logging | |
logging.basicConfig(format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING) | |
logging.getLogger("haystack").setLevel(logging.INFO) | |
# Launch Elasticsearch | |
launch_es() | |
# Initialize the Haystack pipeline and document store | |
document_store = ElasticsearchDocumentStore( | |
host="localhost", | |
username="", | |
password="", | |
index="document", | |
embedding_field="question_emb", | |
embedding_dim=384, | |
excluded_meta_data=["question_emb"], | |
similarity="cosine", | |
) | |
retriever = EmbeddingRetriever( | |
document_store=document_store, | |
embedding_model="sentence-transformers/all-MiniLM-L6-v2", | |
use_gpu=True, | |
scale_score=False, | |
) | |
doc_to_answers = Docs2Answers() | |
doc_dir = "data/basic_faq_pipeline" | |
s3_url = "https://core-engineering.s3.eu-central-1.amazonaws.com/public/scripts/small_faq_covid.csv1.zip" | |
fetch_archive_from_http(url=s3_url, output_dir=doc_dir) | |
df = pd.read_csv(f"{doc_dir}/small_faq_covid.csv") | |
# Minimal cleaning | |
df.fillna(value="", inplace=True) | |
df["question"] = df["question"].apply(lambda x: x.strip()) | |
# Get embeddings for our questions from the FAQs | |
questions = list(df["question"].values) | |
df["question_emb"] = retriever.embed_queries(queries=questions).tolist() | |
df = df.rename(columns={"question": "content"}) | |
# Convert Dataframe to list of dicts and index them in our DocumentStore | |
docs_to_index = df.to_dict(orient="records") | |
document_store.write_documents(docs_to_index) | |
# Initialize a Pipeline (this time without a reader) and ask questions | |
pipeline = Pipeline() | |
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) | |
pipeline.add_node(component=doc_to_answers, name="Docs2Answers", inputs=["Retriever"]) | |
# Create the Streamlit app | |
st.title("FAQ Search") | |
question = st.text_input("Ask a question:") | |
if question: | |
params = {"Retriever": {"top_k": 10}} # Modify parameters as needed | |
prediction = pipeline.run(query=question, params=params) | |
st.subheader("Answers:") | |
print_answers(prediction, details="medium") | |