from typing import List from JobDescription import JobDescription, AIInformation from huggingface_hub import hf_hub_download, HfApi, login from pathlib import Path import json import os REPO_ID = "PaulMartrenchar/jobsearch_database" FILE_NAME = "db.json" JSON_DATASET_DIR = Path("json_dataset") JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) JSON_DATASET_PATH = JSON_DATASET_DIR / FILE_NAME class Database: def __init__(self): self.DB : List[JobDescription] = [] login(token=os.getenv("HF"), add_to_git_credential=True) def get_current_db(self) -> List[JobDescription]: filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset") with open(filepath, 'r') as file: return json.load(file) def save_db(self, new_db): api = HfApi() with open(FILE_NAME, 'w') as file: json.dump(new_db, file, cls=CustomEncoder, indent=4) api.upload_file( path_or_fileobj=FILE_NAME, path_in_repo=FILE_NAME, repo_id=REPO_ID, repo_type="dataset", ) def merge_dbs(self, list1 : List[JobDescription], list2 : List[JobDescription]): unique_urls = set() merged_list = [] for job in list1 + list2: if job.url not in unique_urls: unique_urls.add(job.url) merged_list.append(job) return merged_list def add_to_db(self, new_jobs : List[JobDescription]): #Remove descriptions from the JobDescription for job in new_jobs: job.job_description = "" #get current DB current_db = self.DB #merge new_db = self.merge_dbs(current_db, new_jobs) #update self.save_db(new_db) self.DB = new_db class CustomEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, JobDescription) or isinstance(obj, AIInformation): return obj.to_dict() return json.JSONEncoder.default(self, obj)