'''This module contains utility functions for the project''' import mmh3 from haystack import Document def get_unique_docs(dataset): '''Get unique documents from dataset Args: dataset: list of dictionaries Returns: docs: list of haystack.Document ''' unique_docs = set() docs = list() for doc in dataset: if doc["context"] is not None and doc["context_id"] not in unique_docs: unique_docs.add(doc["context_id"]) document = Document(content=doc["context"], meta={'title': doc["context_title"], 'context_id': doc["context_id"]}) docs.append(document) return docs