Spaces:
Sleeping
Sleeping
from typing import List | |
from JobDescription import JobDescription, AIInformation | |
from huggingface_hub import hf_hub_download, HfApi, login | |
from pathlib import Path | |
import json | |
import os | |
REPO_ID = "PaulMartrenchar/jobsearch_database" | |
FILE_NAME = "db.json" | |
JSON_DATASET_DIR = Path("json_dataset") | |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) | |
JSON_DATASET_PATH = JSON_DATASET_DIR / FILE_NAME | |
class Database: | |
def __init__(self): | |
self.DB : List[JobDescription] = [] | |
login(token=os.getenv("HF"), add_to_git_credential=True) | |
def get_current_db(self) -> List[JobDescription]: | |
filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset") | |
with open(filepath, 'r') as file: | |
return json.load(file) | |
def save_db(self, new_db): | |
api = HfApi() | |
with open(FILE_NAME, 'w') as file: | |
json.dump(new_db, file, cls=CustomEncoder, indent=4) | |
api.upload_file( | |
path_or_fileobj=FILE_NAME, | |
path_in_repo=FILE_NAME, | |
repo_id=REPO_ID, | |
repo_type="dataset", | |
) | |
def merge_dbs(self, list1 : List[JobDescription], list2 : List[JobDescription]): | |
unique_urls = set() | |
merged_list = [] | |
for job in list1 + list2: | |
if job.url not in unique_urls: | |
unique_urls.add(job.url) | |
merged_list.append(job) | |
return merged_list | |
def add_to_db(self, new_jobs : List[JobDescription]): | |
#Remove descriptions from the JobDescription | |
for job in new_jobs: | |
job.job_description = "" | |
#get current DB | |
current_db = self.DB | |
#merge | |
new_db = self.merge_dbs(current_db, new_jobs) | |
#update | |
self.save_db(new_db) | |
self.DB = new_db | |
class CustomEncoder(json.JSONEncoder): | |
def default(self, obj): | |
if isinstance(obj, JobDescription) or isinstance(obj, AIInformation): | |
return obj.to_dict() | |
return json.JSONEncoder.default(self, obj) |