Spaces:
Sleeping
Sleeping
File size: 2,043 Bytes
5562352 41c3c39 09c7fec c870750 5562352 09c7fec 5562352 fe40eee 5562352 c870750 6b24d80 27911c6 6b24d80 41c3c39 6b24d80 41c3c39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from typing import List
from JobDescription import JobDescription, AIInformation
from huggingface_hub import hf_hub_download, HfApi, login
from pathlib import Path
import json
import os
REPO_ID = "PaulMartrenchar/jobsearch_database"
FILE_NAME = "db.json"
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / FILE_NAME
class Database:
def __init__(self):
self.DB : List[JobDescription] = []
login(token=os.getenv("HF"), add_to_git_credential=True)
def get_current_db(self) -> List[JobDescription]:
filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset")
with open(filepath, 'r') as file:
return json.load(file)
def save_db(self, new_db):
api = HfApi()
with open(FILE_NAME, 'w') as file:
json.dump(new_db, file, cls=CustomEncoder, indent=4)
api.upload_file(
path_or_fileobj=FILE_NAME,
path_in_repo=FILE_NAME,
repo_id=REPO_ID,
repo_type="dataset",
)
def merge_dbs(self, list1 : List[JobDescription], list2 : List[JobDescription]):
unique_urls = set()
merged_list = []
for job in list1 + list2:
if job.url not in unique_urls:
unique_urls.add(job.url)
merged_list.append(job)
return merged_list
def add_to_db(self, new_jobs : List[JobDescription]):
#Remove descriptions from the JobDescription
for job in new_jobs:
job.job_description = ""
#get current DB
current_db = self.DB
#merge
new_db = self.merge_dbs(current_db, new_jobs)
#update
self.save_db(new_db)
self.DB = new_db
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, JobDescription) or isinstance(obj, AIInformation):
return obj.to_dict()
return json.JSONEncoder.default(self, obj) |