Spaces:

PaulMartrenchar
/

jobsearch

Sleeping

PaulMartrenchar commited on Jan 2

Commit

5562352

1 Parent(s): dae902c

Try to save to DB

Files changed (3) hide show

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from jobspy_indeed import indeed_get_jobs
 from WelcomeToTheJungle import wtoj_get_jobs
 from jobspy_linkedin import linkedin_get_jobs
 from ai_manager import get_extra_information
 def html_format_page(jobs : List[JobDescription]):
     result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
@@ -55,6 +56,8 @@ def search_jobs(search_term, platform):
     for job in selected_jobs:
         job.ai_result = get_extra_information(job.company, job.job_description)
     return html_format_page(selected_jobs)

 from WelcomeToTheJungle import wtoj_get_jobs
 from jobspy_linkedin import linkedin_get_jobs
 from ai_manager import get_extra_information
+from db import add_to_db
 def html_format_page(jobs : List[JobDescription]):
     result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
     for job in selected_jobs:
         job.ai_result = get_extra_information(job.company, job.job_description)
+    add_to_db(selected_jobs)
     return html_format_page(selected_jobs)

db.py ADDED Viewed

+from typing import List
+from JobDescription import JobDescription
+from huggingface_hub import hf_hub_download, HfApi, HfFolder, Repository, push_to_hub
+import json
+REPO_ID = "PaulMartrenchar/jobsearch_database"
+FILE_NAME = "db.json"
+def get_current_db() -> List[JobDescription]:
+    filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset")
+    with open(filepath, 'r') as file:
+        return json.load(file)
+def merge_dbs(list1 : List[JobDescription], list2 : List[JobDescription]):
+    unique_urls = set()
+    merged_list = []
+    for job in list1 + list2:
+        if job.url not in unique_urls:
+            unique_urls.add(job.url)
+            merged_list.append(job)
+    return merged_list
+def add_to_db(new_jobs : List[JobDescription]):
+    #Remove descriptions from the JobDescription
+    for job in new_jobs:
+        job.job_description = ""
+    #get current DB
+    current_db = []
+    #merge
+    new_db = merge_dbs(current_db, new_jobs)
+    #update
+    with open("./" + FILE_NAME, 'w') as file:
+        json.dump(new_db, file, indent=4)
+    push_to_hub(
+        repo_id=REPO_ID,
+        filename="./" + FILE_NAME,
+        repo_type="dataset"
+    )

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 mistralai
 python-jobspy
 markdownify
-beautifulsoup4

 mistralai
 python-jobspy
 markdownify
+beautifulsoup4
+huggingface_hub