Spaces:

PaulMartrenchar
/

jobsearch

Sleeping

App Files Files Community

PaulMartrenchar commited on Jan 2

Commit

99273f7

1 Parent(s): 91a0869

Convert LinkedIn to the new JobDescription class

Browse files

Files changed (2) hide show

JobDescription.py +10 -1
jobspy_linkedin.py +26 -46

JobDescription.py CHANGED Viewed

@@ -1,3 +1,5 @@
 class JobDescription:
     def __init__(self, title, company, url, company_url, job_description):
         self.title = title
@@ -26,6 +28,13 @@ class JobDescription:
         if isinstance(input, list):
             return "<ul>" + "".join(f"<li>{item}</li>" for item in input) + "</ul>"
         return input
     def to_html(self):
         #open box
@@ -35,7 +44,7 @@ class JobDescription:
         #text part
         result.append("<div style='flex: 5; padding: 10px;'>")
         result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(self.url, self.format_should_apply(self.ai_result["should_apply"]), self.title))
-        result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(self.company_url, self.company, self.ai_result["company_description"], self.published_at.strftime("%d/%m/%Y")))
         result.append("<p><h4>Position: {}</h4>{}</p>".format(self.get_salary(), self.format_str_or_list(self.ai_result["position_summary"])))
         result.append("<p><h4>Language:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["language_requirements"])))
         result.append("<p><h4>Experience:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["experience_requirements"])))

+from datetime import datetime
 class JobDescription:
     def __init__(self, title, company, url, company_url, job_description):
         self.title = title
         if isinstance(input, list):
             return "<ul>" + "".join(f"<li>{item}</li>" for item in input) + "</ul>"
         return input
+    def format_posted_date(self, date):
+        if "{}".format(date) == "nan":
+            return "?"
+        if isinstance(date, str):
+            return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
+        return date.strftime("%d/%m/%Y")
     def to_html(self):
         #open box
         #text part
         result.append("<div style='flex: 5; padding: 10px;'>")
         result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(self.url, self.format_should_apply(self.ai_result["should_apply"]), self.title))
+        result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(self.company_url, self.company, self.ai_result["company_description"], self.format_posted_date(self.published_at)))
         result.append("<p><h4>Position: {}</h4>{}</p>".format(self.get_salary(), self.format_str_or_list(self.ai_result["position_summary"])))
         result.append("<p><h4>Language:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["language_requirements"])))
         result.append("<p><h4>Experience:</h4>{}</p>".format(self.format_str_or_list(self.ai_result["experience_requirements"])))

jobspy_linkedin.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import warnings
 import datetime
 from jobspy import scrape_jobs
@@ -18,16 +20,9 @@ def get_company_url(job):
 def get_salary(job):
     if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
-        if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
-            return job["ai_result"]["salary_range"]
         return ""
     return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
-def format_should_apply(should_apply):
-    if should_apply:
-        return "&#x2B50; "
-    return ""
 def get_logo(job):
     try:
         if "{}".format(job["logo_photo_url"]) == "nan":
@@ -36,38 +31,6 @@ def get_logo(job):
     except:
         return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
-def format_str_or_list(sum):
-    if isinstance(sum, str):
-        return sum.replace("\n", "<br />")
-    if isinstance(sum, list):
-        return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
-    return sum
-def format_posted_date(date):
-    if "{}".format(date) == "nan":
-        return "?"
-    if isinstance(date, str):
-        return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
-    return date.strftime("%d/%m/%Y")
-def html_format_job(job):
-    #open box
-    result = ["<div class='job'>"]
-    #logo
-    result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
-    #text part
-    result.append("<div style='flex: 5; padding: 10px;'>")
-    result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
-    result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
-    result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
-    result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
-    result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
-    #close text part
-    result.append("</div>")
-    #close box
-    result.append("</div>")
-    return " ".join(result)
 def filterout_jobs(jobs, job_filter, job_filter_negative):
     selected_jobs = []
     for index, job in jobs.iterrows():
@@ -78,12 +41,11 @@ def filterout_jobs(jobs, job_filter, job_filter_negative):
     return selected_jobs
-def html_format_page(jobs, job_filter, job_filter_negative):
-    selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
     result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
-    if len(selected_jobs) > 0:
-        for job in selected_jobs:
-            result.append(html_format_job(job))
     else:
         result.append("No job found")
     result.append("</body></html>")
@@ -102,12 +64,30 @@ def get_jobs(search_term, results_wanted):
         enforce_annual_salary=True,
     )
-def linkedin_get_html(search_term):
     jobs = get_jobs(search_term, 50)
     #filter on the job description
     job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
     job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
-    return html_format_page(jobs, job_filter, job_filter_negative)

 import warnings
 import datetime
+from typing import List
+from JobDescription import JobDescription
 from jobspy import scrape_jobs
 def get_salary(job):
     if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
         return ""
     return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
 def get_logo(job):
     try:
         if "{}".format(job["logo_photo_url"]) == "nan":
     except:
         return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
 def filterout_jobs(jobs, job_filter, job_filter_negative):
     selected_jobs = []
     for index, job in jobs.iterrows():
     return selected_jobs
+def html_format_page(jobs : List[JobDescription]):
     result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
+    if len(jobs) > 0:
+        for job in jobs:
+            result.append(job.to_html())
     else:
         result.append("No job found")
     result.append("</body></html>")
         enforce_annual_salary=True,
     )
+def get_filtered_jobs(search_term)-> List[JobDescription]:
     jobs = get_jobs(search_term, 50)
     #filter on the job description
     job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
     job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
+    selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
+    result = []
+    for job in selected_jobs:
+        job_desc = JobDescription(title=job["title"], company=job["company"], url=get_job_url(job), company_url=get_company_url(job),
+                                  job_description=job["description"])
+        job_desc.published_at=job["date_posted"]
+        job_desc.organization_logo_url = get_logo(job)
+        job_desc.salary_range = get_salary(job)
+        result.append(job_desc)
+    return result
+def linkedin_get_html(search_term):
+    jobs = get_filtered_jobs(search_term)
+    for job in jobs:
+        job.ai_result = get_offer_information(job.company, job.job_description)
+    return html_format_page(jobs)