Spaces:
Runtime error
Runtime error
from .sourcer import search_web | |
import pandas as pd | |
import os | |
root_dir = 'data/datasets' | |
pira_df = pd.read_csv(os.path.join(root_dir, 'pira_simplified.csv')) | |
def gen_corpus(query: str, pira: bool=True, ONU: bool=True, web: bool=True)->list: | |
corpus = [] | |
if not (pira or ONU or web): | |
# TODO: raise error | |
pass | |
if pira: | |
corpus += pira_df.text.to_list() | |
if ONU: | |
# TODO: implement PDFs | |
pass | |
if web: | |
corpus += search_web(query) | |
return corpus |