PaulMartrenchar commited on
Commit
5562352
·
1 Parent(s): dae902c

Try to save to DB

Browse files
Files changed (3) hide show
  1. app.py +3 -0
  2. db.py +42 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -7,6 +7,7 @@ from jobspy_indeed import indeed_get_jobs
7
  from WelcomeToTheJungle import wtoj_get_jobs
8
  from jobspy_linkedin import linkedin_get_jobs
9
  from ai_manager import get_extra_information
 
10
 
11
  def html_format_page(jobs : List[JobDescription]):
12
  result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
@@ -55,6 +56,8 @@ def search_jobs(search_term, platform):
55
  for job in selected_jobs:
56
  job.ai_result = get_extra_information(job.company, job.job_description)
57
 
 
 
58
  return html_format_page(selected_jobs)
59
 
60
 
 
7
  from WelcomeToTheJungle import wtoj_get_jobs
8
  from jobspy_linkedin import linkedin_get_jobs
9
  from ai_manager import get_extra_information
10
+ from db import add_to_db
11
 
12
  def html_format_page(jobs : List[JobDescription]):
13
  result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
 
56
  for job in selected_jobs:
57
  job.ai_result = get_extra_information(job.company, job.job_description)
58
 
59
+ add_to_db(selected_jobs)
60
+
61
  return html_format_page(selected_jobs)
62
 
63
 
db.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from JobDescription import JobDescription
3
+ from huggingface_hub import hf_hub_download, HfApi, HfFolder, Repository, push_to_hub
4
+ import json
5
+
6
+ REPO_ID = "PaulMartrenchar/jobsearch_database"
7
+ FILE_NAME = "db.json"
8
+
9
+ def get_current_db() -> List[JobDescription]:
10
+ filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset")
11
+ with open(filepath, 'r') as file:
12
+ return json.load(file)
13
+
14
+ def merge_dbs(list1 : List[JobDescription], list2 : List[JobDescription]):
15
+ unique_urls = set()
16
+ merged_list = []
17
+
18
+ for job in list1 + list2:
19
+ if job.url not in unique_urls:
20
+ unique_urls.add(job.url)
21
+ merged_list.append(job)
22
+
23
+ return merged_list
24
+
25
+ def add_to_db(new_jobs : List[JobDescription]):
26
+ #Remove descriptions from the JobDescription
27
+ for job in new_jobs:
28
+ job.job_description = ""
29
+
30
+ #get current DB
31
+ current_db = []
32
+ #merge
33
+ new_db = merge_dbs(current_db, new_jobs)
34
+
35
+ #update
36
+ with open("./" + FILE_NAME, 'w') as file:
37
+ json.dump(new_db, file, indent=4)
38
+ push_to_hub(
39
+ repo_id=REPO_ID,
40
+ filename="./" + FILE_NAME,
41
+ repo_type="dataset"
42
+ )
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  mistralai
2
  python-jobspy
3
  markdownify
4
- beautifulsoup4
 
 
1
  mistralai
2
  python-jobspy
3
  markdownify
4
+ beautifulsoup4
5
+ huggingface_hub