jobsearch / jobspy_linkedin.py
PaulMartrenchar's picture
move ai part in new ai_manager.py
fd5aa4d
raw
history blame
5.03 kB
import warnings
import datetime
from jobspy import scrape_jobs
warnings.filterwarnings("ignore")
from ai_manager import get_offer_information
def get_job_url(job):
if "{}".format(job["job_url_direct"]) in ["null", "nan", "None"]:
return job["job_url"]
return job["job_url_direct"]
def get_company_url(job):
if "{}".format(job["company_url_direct"]) in ["null", "nan", "None"]:
return job["company_url"]
return job["company_url_direct"]
def get_salary(job):
if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
return job["ai_result"]["salary_range"]
return ""
return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
def format_should_apply(should_apply):
if should_apply:
return "⭐ "
return ""
def get_logo(job):
try:
if "{}".format(job["logo_photo_url"]) == "nan":
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
return job["logo_photo_url"]
except:
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
def format_str_or_list(sum):
if isinstance(sum, str):
return sum.replace("\n", "<br />")
if isinstance(sum, list):
return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
return sum
def format_posted_date(date):
if "{}".format(date) == "nan":
return "?"
if isinstance(date, str):
return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
return date.strftime("%d/%m/%Y")
def html_format_job(job):
#open box
result = ["<div class='job'>"]
#logo
result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
#text part
result.append("<div style='flex: 5; padding: 10px;'>")
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
#close text part
result.append("</div>")
#close box
result.append("</div>")
return " ".join(result)
def filterout_jobs(jobs, job_filter, job_filter_negative):
selected_jobs = []
for index, job in jobs.iterrows():
if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter) and "{}".format(job["description"]) not in ["null", "nan", "None"]:
job["ai_result"] = get_offer_information(job["company"], job["description"])
if job["ai_result"]["is_an_internship"] == False:
selected_jobs.append(job)
return selected_jobs
def html_format_page(jobs, job_filter, job_filter_negative):
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
if len(selected_jobs) > 0:
for job in selected_jobs:
result.append(html_format_job(job))
else:
result.append("No job found")
result.append("</body></html>")
return " ".join(result)
def get_jobs(search_term, results_wanted):
return scrape_jobs(
site_name=["linkedin"],#, "linkedin", "glassdoor"],
search_term=search_term,
location="Paris, France",
job_type="fulltime",
results_wanted=results_wanted,
#hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old)
linkedin_fetch_description=True,
enforce_annual_salary=True,
)
def linkedin_get_html(search_term):
jobs = get_jobs(search_term, 50)
#filter on the job description
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
return html_format_page(jobs, job_filter, job_filter_negative)