qasports-website / utils.py
leomaurodesenv's picture
feat(utils): Create utils, add get unique documents function
4ce2e5d
raw
history blame
653 Bytes
'''This module contains utility functions for the project'''
import mmh3
from haystack import Document
def get_unique_docs(dataset):
'''Get unique documents from dataset
Args:
dataset: list of dictionaries
Returns:
docs: list of haystack.Document
'''
unique_docs = set()
docs = list()
for doc in dataset:
if doc["context"] is not None and doc["context_id"] not in unique_docs:
unique_docs.add(doc["context_id"])
document = Document(content=doc["context"], meta={'title': doc["context_title"], 'context_id': doc["context_id"]})
docs.append(document)
return docs