jobsearch / db.py
PaulMartrenchar's picture
fix
41c3c39
from typing import List
from JobDescription import JobDescription, AIInformation
from huggingface_hub import hf_hub_download, HfApi, login
from pathlib import Path
import json
import os
REPO_ID = "PaulMartrenchar/jobsearch_database"
FILE_NAME = "db.json"
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / FILE_NAME
class Database:
def __init__(self):
self.DB : List[JobDescription] = []
login(token=os.getenv("HF"), add_to_git_credential=True)
def get_current_db(self) -> List[JobDescription]:
filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset")
with open(filepath, 'r') as file:
return json.load(file)
def save_db(self, new_db):
api = HfApi()
with open(FILE_NAME, 'w') as file:
json.dump(new_db, file, cls=CustomEncoder, indent=4)
api.upload_file(
path_or_fileobj=FILE_NAME,
path_in_repo=FILE_NAME,
repo_id=REPO_ID,
repo_type="dataset",
)
def merge_dbs(self, list1 : List[JobDescription], list2 : List[JobDescription]):
unique_urls = set()
merged_list = []
for job in list1 + list2:
if job.url not in unique_urls:
unique_urls.add(job.url)
merged_list.append(job)
return merged_list
def add_to_db(self, new_jobs : List[JobDescription]):
#Remove descriptions from the JobDescription
for job in new_jobs:
job.job_description = ""
#get current DB
current_db = self.DB
#merge
new_db = self.merge_dbs(current_db, new_jobs)
#update
self.save_db(new_db)
self.DB = new_db
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, JobDescription) or isinstance(obj, AIInformation):
return obj.to_dict()
return json.JSONEncoder.default(self, obj)