Spaces:
Sleeping
Sleeping
Commit
·
5562352
1
Parent(s):
dae902c
Try to save to DB
Browse files- app.py +3 -0
- db.py +42 -0
- requirements.txt +2 -1
app.py
CHANGED
@@ -7,6 +7,7 @@ from jobspy_indeed import indeed_get_jobs
|
|
7 |
from WelcomeToTheJungle import wtoj_get_jobs
|
8 |
from jobspy_linkedin import linkedin_get_jobs
|
9 |
from ai_manager import get_extra_information
|
|
|
10 |
|
11 |
def html_format_page(jobs : List[JobDescription]):
|
12 |
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
@@ -55,6 +56,8 @@ def search_jobs(search_term, platform):
|
|
55 |
for job in selected_jobs:
|
56 |
job.ai_result = get_extra_information(job.company, job.job_description)
|
57 |
|
|
|
|
|
58 |
return html_format_page(selected_jobs)
|
59 |
|
60 |
|
|
|
7 |
from WelcomeToTheJungle import wtoj_get_jobs
|
8 |
from jobspy_linkedin import linkedin_get_jobs
|
9 |
from ai_manager import get_extra_information
|
10 |
+
from db import add_to_db
|
11 |
|
12 |
def html_format_page(jobs : List[JobDescription]):
|
13 |
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
|
|
56 |
for job in selected_jobs:
|
57 |
job.ai_result = get_extra_information(job.company, job.job_description)
|
58 |
|
59 |
+
add_to_db(selected_jobs)
|
60 |
+
|
61 |
return html_format_page(selected_jobs)
|
62 |
|
63 |
|
db.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from JobDescription import JobDescription
|
3 |
+
from huggingface_hub import hf_hub_download, HfApi, HfFolder, Repository, push_to_hub
|
4 |
+
import json
|
5 |
+
|
6 |
+
REPO_ID = "PaulMartrenchar/jobsearch_database"
|
7 |
+
FILE_NAME = "db.json"
|
8 |
+
|
9 |
+
def get_current_db() -> List[JobDescription]:
|
10 |
+
filepath = hf_hub_download(repo_id=REPO_ID, filename=FILE_NAME, repo_type="dataset")
|
11 |
+
with open(filepath, 'r') as file:
|
12 |
+
return json.load(file)
|
13 |
+
|
14 |
+
def merge_dbs(list1 : List[JobDescription], list2 : List[JobDescription]):
|
15 |
+
unique_urls = set()
|
16 |
+
merged_list = []
|
17 |
+
|
18 |
+
for job in list1 + list2:
|
19 |
+
if job.url not in unique_urls:
|
20 |
+
unique_urls.add(job.url)
|
21 |
+
merged_list.append(job)
|
22 |
+
|
23 |
+
return merged_list
|
24 |
+
|
25 |
+
def add_to_db(new_jobs : List[JobDescription]):
|
26 |
+
#Remove descriptions from the JobDescription
|
27 |
+
for job in new_jobs:
|
28 |
+
job.job_description = ""
|
29 |
+
|
30 |
+
#get current DB
|
31 |
+
current_db = []
|
32 |
+
#merge
|
33 |
+
new_db = merge_dbs(current_db, new_jobs)
|
34 |
+
|
35 |
+
#update
|
36 |
+
with open("./" + FILE_NAME, 'w') as file:
|
37 |
+
json.dump(new_db, file, indent=4)
|
38 |
+
push_to_hub(
|
39 |
+
repo_id=REPO_ID,
|
40 |
+
filename="./" + FILE_NAME,
|
41 |
+
repo_type="dataset"
|
42 |
+
)
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
mistralai
|
2 |
python-jobspy
|
3 |
markdownify
|
4 |
-
beautifulsoup4
|
|
|
|
1 |
mistralai
|
2 |
python-jobspy
|
3 |
markdownify
|
4 |
+
beautifulsoup4
|
5 |
+
huggingface_hub
|