Spaces:
Sleeping
Sleeping
Commit
·
99273f7
1
Parent(s):
91a0869
Convert LinkedIn to the new JobDescription class
Browse files- JobDescription.py +10 -1
- jobspy_linkedin.py +26 -46
JobDescription.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
class JobDescription:
|
2 |
def __init__(self, title, company, url, company_url, job_description):
|
3 |
self.title = title
|
@@ -26,6 +28,13 @@ class JobDescription:
|
|
26 |
if isinstance(input, list):
|
27 |
return "<ul>" + "".join(f"<li>{item}</li>" for item in input) + "</ul>"
|
28 |
return input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def to_html(self):
|
31 |
#open box
|
@@ -35,7 +44,7 @@ class JobDescription:
|
|
35 |
#text part
|
36 |
result.append("<div style='flex: 5; padding: 10px;'>")
|
37 |
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(self.url, self.format_should_apply(self.ai_result["should_apply"]), self.title))
|
38 |
-
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(self.company_url, self.company, self.ai_result["company_description"], self.published_at
|
39 |
result.append("<p><h4>Position: {}</h4>{}</p>".format(self.get_salary(), self.format_str_or_list(self.ai_result["position_summary"])))
|
40 |
result.append("<p><h4>Language:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["language_requirements"])))
|
41 |
result.append("<p><h4>Experience:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["experience_requirements"])))
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
|
3 |
class JobDescription:
|
4 |
def __init__(self, title, company, url, company_url, job_description):
|
5 |
self.title = title
|
|
|
28 |
if isinstance(input, list):
|
29 |
return "<ul>" + "".join(f"<li>{item}</li>" for item in input) + "</ul>"
|
30 |
return input
|
31 |
+
|
32 |
+
def format_posted_date(self, date):
|
33 |
+
if "{}".format(date) == "nan":
|
34 |
+
return "?"
|
35 |
+
if isinstance(date, str):
|
36 |
+
return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
|
37 |
+
return date.strftime("%d/%m/%Y")
|
38 |
|
39 |
def to_html(self):
|
40 |
#open box
|
|
|
44 |
#text part
|
45 |
result.append("<div style='flex: 5; padding: 10px;'>")
|
46 |
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(self.url, self.format_should_apply(self.ai_result["should_apply"]), self.title))
|
47 |
+
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(self.company_url, self.company, self.ai_result["company_description"], self.format_posted_date(self.published_at)))
|
48 |
result.append("<p><h4>Position: {}</h4>{}</p>".format(self.get_salary(), self.format_str_or_list(self.ai_result["position_summary"])))
|
49 |
result.append("<p><h4>Language:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["language_requirements"])))
|
50 |
result.append("<p><h4>Experience:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["experience_requirements"])))
|
jobspy_linkedin.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import warnings
|
2 |
import datetime
|
|
|
|
|
3 |
|
4 |
from jobspy import scrape_jobs
|
5 |
|
@@ -18,16 +20,9 @@ def get_company_url(job):
|
|
18 |
|
19 |
def get_salary(job):
|
20 |
if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
|
21 |
-
if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
|
22 |
-
return job["ai_result"]["salary_range"]
|
23 |
return ""
|
24 |
return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
|
25 |
|
26 |
-
def format_should_apply(should_apply):
|
27 |
-
if should_apply:
|
28 |
-
return "⭐ "
|
29 |
-
return ""
|
30 |
-
|
31 |
def get_logo(job):
|
32 |
try:
|
33 |
if "{}".format(job["logo_photo_url"]) == "nan":
|
@@ -36,38 +31,6 @@ def get_logo(job):
|
|
36 |
except:
|
37 |
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
|
38 |
|
39 |
-
def format_str_or_list(sum):
|
40 |
-
if isinstance(sum, str):
|
41 |
-
return sum.replace("\n", "<br />")
|
42 |
-
if isinstance(sum, list):
|
43 |
-
return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
|
44 |
-
return sum
|
45 |
-
|
46 |
-
def format_posted_date(date):
|
47 |
-
if "{}".format(date) == "nan":
|
48 |
-
return "?"
|
49 |
-
if isinstance(date, str):
|
50 |
-
return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
|
51 |
-
return date.strftime("%d/%m/%Y")
|
52 |
-
|
53 |
-
def html_format_job(job):
|
54 |
-
#open box
|
55 |
-
result = ["<div class='job'>"]
|
56 |
-
#logo
|
57 |
-
result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
|
58 |
-
#text part
|
59 |
-
result.append("<div style='flex: 5; padding: 10px;'>")
|
60 |
-
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
|
61 |
-
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
|
62 |
-
result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
|
63 |
-
result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
|
64 |
-
result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
|
65 |
-
#close text part
|
66 |
-
result.append("</div>")
|
67 |
-
#close box
|
68 |
-
result.append("</div>")
|
69 |
-
return " ".join(result)
|
70 |
-
|
71 |
def filterout_jobs(jobs, job_filter, job_filter_negative):
|
72 |
selected_jobs = []
|
73 |
for index, job in jobs.iterrows():
|
@@ -78,12 +41,11 @@ def filterout_jobs(jobs, job_filter, job_filter_negative):
|
|
78 |
|
79 |
return selected_jobs
|
80 |
|
81 |
-
def html_format_page(jobs
|
82 |
-
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
|
83 |
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
84 |
-
if len(
|
85 |
-
for job in
|
86 |
-
result.append(
|
87 |
else:
|
88 |
result.append("No job found")
|
89 |
result.append("</body></html>")
|
@@ -102,12 +64,30 @@ def get_jobs(search_term, results_wanted):
|
|
102 |
enforce_annual_salary=True,
|
103 |
)
|
104 |
|
105 |
-
def
|
106 |
jobs = get_jobs(search_term, 50)
|
107 |
|
108 |
#filter on the job description
|
109 |
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
|
110 |
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
|
111 |
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
|
|
1 |
import warnings
|
2 |
import datetime
|
3 |
+
from typing import List
|
4 |
+
from JobDescription import JobDescription
|
5 |
|
6 |
from jobspy import scrape_jobs
|
7 |
|
|
|
20 |
|
21 |
def get_salary(job):
|
22 |
if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
|
|
|
|
|
23 |
return ""
|
24 |
return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
|
25 |
|
|
|
|
|
|
|
|
|
|
|
26 |
def get_logo(job):
|
27 |
try:
|
28 |
if "{}".format(job["logo_photo_url"]) == "nan":
|
|
|
31 |
except:
|
32 |
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def filterout_jobs(jobs, job_filter, job_filter_negative):
|
35 |
selected_jobs = []
|
36 |
for index, job in jobs.iterrows():
|
|
|
41 |
|
42 |
return selected_jobs
|
43 |
|
44 |
+
def html_format_page(jobs : List[JobDescription]):
|
|
|
45 |
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
46 |
+
if len(jobs) > 0:
|
47 |
+
for job in jobs:
|
48 |
+
result.append(job.to_html())
|
49 |
else:
|
50 |
result.append("No job found")
|
51 |
result.append("</body></html>")
|
|
|
64 |
enforce_annual_salary=True,
|
65 |
)
|
66 |
|
67 |
+
def get_filtered_jobs(search_term)-> List[JobDescription]:
|
68 |
jobs = get_jobs(search_term, 50)
|
69 |
|
70 |
#filter on the job description
|
71 |
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
|
72 |
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
|
73 |
|
74 |
+
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
|
75 |
+
|
76 |
+
result = []
|
77 |
+
for job in selected_jobs:
|
78 |
+
job_desc = JobDescription(title=job["title"], company=job["company"], url=get_job_url(job), company_url=get_company_url(job),
|
79 |
+
job_description=job["description"])
|
80 |
+
job_desc.published_at=job["date_posted"]
|
81 |
+
job_desc.organization_logo_url = get_logo(job)
|
82 |
+
job_desc.salary_range = get_salary(job)
|
83 |
+
result.append(job_desc)
|
84 |
+
|
85 |
+
return result
|
86 |
+
|
87 |
+
def linkedin_get_html(search_term):
|
88 |
+
jobs = get_filtered_jobs(search_term)
|
89 |
+
for job in jobs:
|
90 |
+
job.ai_result = get_offer_information(job.company, job.job_description)
|
91 |
+
|
92 |
+
return html_format_page(jobs)
|
93 |
|