File size: 2,043 Bytes
5562352
41c3c39
09c7fec
c870750
5562352
09c7fec
5562352
fe40eee
5562352
c870750
 
 
 
6b24d80
 
 
27911c6
6b24d80
 
 
 
 
 
 
 
 
 
41c3c39
6b24d80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41c3c39
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from typing import List
from JobDescription import JobDescription, AIInformation
from huggingface_hub import hf_hub_download, HfApi, login
from pathlib import Path
import json
import os

REPO_ID = "PaulMartrenchar/jobsearch_database"
FILE_NAME = "db.json"
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / FILE_NAME

class Database:
    def __init__(self):
        self.DB : List[JobDescription] = []
        login(token=os.getenv("HF"), add_to_git_credential=True)


    def get_current_db(self) -> List[JobDescription]:
        filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset")
        with open(filepath, 'r') as file:
            return json.load(file)

    def save_db(self, new_db):
        api = HfApi()
        with open(FILE_NAME, 'w') as file:
            json.dump(new_db, file, cls=CustomEncoder, indent=4)
        api.upload_file(
            path_or_fileobj=FILE_NAME,
            path_in_repo=FILE_NAME,
            repo_id=REPO_ID,
            repo_type="dataset",
        )

    def merge_dbs(self, list1 : List[JobDescription], list2 : List[JobDescription]):
        unique_urls = set()
        merged_list = []

        for job in list1 + list2:
            if job.url not in unique_urls:
                unique_urls.add(job.url)
                merged_list.append(job)

        return merged_list

    def add_to_db(self, new_jobs : List[JobDescription]):
        #Remove descriptions from the JobDescription
        for job in new_jobs:
            job.job_description = ""
        
        #get current DB
        current_db = self.DB
        #merge
        new_db = self.merge_dbs(current_db, new_jobs)

        #update
        self.save_db(new_db)
        self.DB = new_db


class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, JobDescription) or isinstance(obj, AIInformation):
            return obj.to_dict()
        return json.JSONEncoder.default(self, obj)