PaulMartrenchar commited on
Commit
99273f7
·
1 Parent(s): 91a0869

Convert LinkedIn to the new JobDescription class

Browse files
Files changed (2) hide show
  1. JobDescription.py +10 -1
  2. jobspy_linkedin.py +26 -46
JobDescription.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  class JobDescription:
2
  def __init__(self, title, company, url, company_url, job_description):
3
  self.title = title
@@ -26,6 +28,13 @@ class JobDescription:
26
  if isinstance(input, list):
27
  return "<ul>" + "".join(f"<li>{item}</li>" for item in input) + "</ul>"
28
  return input
 
 
 
 
 
 
 
29
 
30
  def to_html(self):
31
  #open box
@@ -35,7 +44,7 @@ class JobDescription:
35
  #text part
36
  result.append("<div style='flex: 5; padding: 10px;'>")
37
  result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(self.url, self.format_should_apply(self.ai_result["should_apply"]), self.title))
38
- result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(self.company_url, self.company, self.ai_result["company_description"], self.published_at.strftime("%d/%m/%Y")))
39
  result.append("<p><h4>Position: {}</h4>{}</p>".format(self.get_salary(), self.format_str_or_list(self.ai_result["position_summary"])))
40
  result.append("<p><h4>Language:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["language_requirements"])))
41
  result.append("<p><h4>Experience:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["experience_requirements"])))
 
1
+ from datetime import datetime
2
+
3
  class JobDescription:
4
  def __init__(self, title, company, url, company_url, job_description):
5
  self.title = title
 
28
  if isinstance(input, list):
29
  return "<ul>" + "".join(f"<li>{item}</li>" for item in input) + "</ul>"
30
  return input
31
+
32
+ def format_posted_date(self, date):
33
+ if "{}".format(date) == "nan":
34
+ return "?"
35
+ if isinstance(date, str):
36
+ return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
37
+ return date.strftime("%d/%m/%Y")
38
 
39
  def to_html(self):
40
  #open box
 
44
  #text part
45
  result.append("<div style='flex: 5; padding: 10px;'>")
46
  result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(self.url, self.format_should_apply(self.ai_result["should_apply"]), self.title))
47
+ result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(self.company_url, self.company, self.ai_result["company_description"], self.format_posted_date(self.published_at)))
48
  result.append("<p><h4>Position: {}</h4>{}</p>".format(self.get_salary(), self.format_str_or_list(self.ai_result["position_summary"])))
49
  result.append("<p><h4>Language:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["language_requirements"])))
50
  result.append("<p><h4>Experience:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["experience_requirements"])))
jobspy_linkedin.py CHANGED
@@ -1,5 +1,7 @@
1
  import warnings
2
  import datetime
 
 
3
 
4
  from jobspy import scrape_jobs
5
 
@@ -18,16 +20,9 @@ def get_company_url(job):
18
 
19
  def get_salary(job):
20
  if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
21
- if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
22
- return job["ai_result"]["salary_range"]
23
  return ""
24
  return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
25
 
26
- def format_should_apply(should_apply):
27
- if should_apply:
28
- return "&#x2B50; "
29
- return ""
30
-
31
  def get_logo(job):
32
  try:
33
  if "{}".format(job["logo_photo_url"]) == "nan":
@@ -36,38 +31,6 @@ def get_logo(job):
36
  except:
37
  return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
38
 
39
- def format_str_or_list(sum):
40
- if isinstance(sum, str):
41
- return sum.replace("\n", "<br />")
42
- if isinstance(sum, list):
43
- return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
44
- return sum
45
-
46
- def format_posted_date(date):
47
- if "{}".format(date) == "nan":
48
- return "?"
49
- if isinstance(date, str):
50
- return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
51
- return date.strftime("%d/%m/%Y")
52
-
53
- def html_format_job(job):
54
- #open box
55
- result = ["<div class='job'>"]
56
- #logo
57
- result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
58
- #text part
59
- result.append("<div style='flex: 5; padding: 10px;'>")
60
- result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
61
- result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
62
- result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
63
- result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
64
- result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
65
- #close text part
66
- result.append("</div>")
67
- #close box
68
- result.append("</div>")
69
- return " ".join(result)
70
-
71
  def filterout_jobs(jobs, job_filter, job_filter_negative):
72
  selected_jobs = []
73
  for index, job in jobs.iterrows():
@@ -78,12 +41,11 @@ def filterout_jobs(jobs, job_filter, job_filter_negative):
78
 
79
  return selected_jobs
80
 
81
- def html_format_page(jobs, job_filter, job_filter_negative):
82
- selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
83
  result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
84
- if len(selected_jobs) > 0:
85
- for job in selected_jobs:
86
- result.append(html_format_job(job))
87
  else:
88
  result.append("No job found")
89
  result.append("</body></html>")
@@ -102,12 +64,30 @@ def get_jobs(search_term, results_wanted):
102
  enforce_annual_salary=True,
103
  )
104
 
105
- def linkedin_get_html(search_term):
106
  jobs = get_jobs(search_term, 50)
107
 
108
  #filter on the job description
109
  job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
110
  job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
111
 
112
- return html_format_page(jobs, job_filter, job_filter_negative)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
 
1
  import warnings
2
  import datetime
3
+ from typing import List
4
+ from JobDescription import JobDescription
5
 
6
  from jobspy import scrape_jobs
7
 
 
20
 
21
  def get_salary(job):
22
  if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
 
 
23
  return ""
24
  return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
25
 
 
 
 
 
 
26
  def get_logo(job):
27
  try:
28
  if "{}".format(job["logo_photo_url"]) == "nan":
 
31
  except:
32
  return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def filterout_jobs(jobs, job_filter, job_filter_negative):
35
  selected_jobs = []
36
  for index, job in jobs.iterrows():
 
41
 
42
  return selected_jobs
43
 
44
+ def html_format_page(jobs : List[JobDescription]):
 
45
  result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
46
+ if len(jobs) > 0:
47
+ for job in jobs:
48
+ result.append(job.to_html())
49
  else:
50
  result.append("No job found")
51
  result.append("</body></html>")
 
64
  enforce_annual_salary=True,
65
  )
66
 
67
+ def get_filtered_jobs(search_term)-> List[JobDescription]:
68
  jobs = get_jobs(search_term, 50)
69
 
70
  #filter on the job description
71
  job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
72
  job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
73
 
74
+ selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
75
+
76
+ result = []
77
+ for job in selected_jobs:
78
+ job_desc = JobDescription(title=job["title"], company=job["company"], url=get_job_url(job), company_url=get_company_url(job),
79
+ job_description=job["description"])
80
+ job_desc.published_at=job["date_posted"]
81
+ job_desc.organization_logo_url = get_logo(job)
82
+ job_desc.salary_range = get_salary(job)
83
+ result.append(job_desc)
84
+
85
+ return result
86
+
87
+ def linkedin_get_html(search_term):
88
+ jobs = get_filtered_jobs(search_term)
89
+ for job in jobs:
90
+ job.ai_result = get_offer_information(job.company, job.job_description)
91
+
92
+ return html_format_page(jobs)
93