Spaces:
Running
Running
File size: 1,328 Bytes
01b8e8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from haystack.schema import Document
import uuid
def format_docs(documents):
"""Given a list of documents, format the documents and return the documents and doc ids."""
db_docs: list = []
for doc in documents:
doc_id = doc['id'] if doc['id'] is not None else str(uuid.uuid4())
db_doc = {
"content": doc['text'],
"content_type": "text",
"id": str(uuid.uuid4()),
"meta": {"id": doc_id},
}
db_docs.append(Document(**db_doc))
return db_docs, [doc.meta["id"] for doc in db_docs]
def index(documents, pipeline):
documents, doc_ids = format_docs(documents)
pipeline.run(documents=documents)
return doc_ids
def search(queries, pipeline):
results = []
matches_queries = pipeline.run_batch(queries=queries)
for matches in matches_queries["documents"]:
query_results = []
for res in matches:
metadata = res.meta
query_results.append(
{
"text": res.content,
"score": res.score,
"id": res.meta["id"],
"fragment_id": res.id
}
)
results.append(
sorted(query_results, key=lambda x: x["score"], reverse=True)
)
return results |