Spaces:
Running
Running
import warnings | |
import datetime | |
from jobspy import scrape_jobs | |
warnings.filterwarnings("ignore") | |
from ai_manager import get_offer_information | |
def get_job_url(job): | |
if "{}".format(job["job_url_direct"]) in ["null", "nan", "None"]: | |
return job["job_url"] | |
return job["job_url_direct"] | |
def get_company_url(job): | |
if "{}".format(job["company_url_direct"]) in ["null", "nan", "None"]: | |
return job["company_url"] | |
return job["company_url_direct"] | |
def get_salary(job): | |
if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None": | |
if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]: | |
return job["ai_result"]["salary_range"] | |
return "" | |
return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"]) | |
def format_should_apply(should_apply): | |
if should_apply: | |
return "⭐ " | |
return "" | |
def get_logo(job): | |
try: | |
if "{}".format(job["logo_photo_url"]) == "nan": | |
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png" | |
return job["logo_photo_url"] | |
except: | |
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png" | |
def format_str_or_list(sum): | |
if isinstance(sum, str): | |
return sum.replace("\n", "<br />") | |
if isinstance(sum, list): | |
return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>" | |
return sum | |
def format_posted_date(date): | |
if "{}".format(date) == "nan": | |
return "?" | |
if isinstance(date, str): | |
return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y") | |
return date.strftime("%d/%m/%Y") | |
def html_format_job(job): | |
#open box | |
result = ["<div class='job'>"] | |
#logo | |
result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job))) | |
#text part | |
result.append("<div style='flex: 5; padding: 10px;'>") | |
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"])) | |
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"]))) | |
result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"]))) | |
result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"]))) | |
result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"]))) | |
#close text part | |
result.append("</div>") | |
#close box | |
result.append("</div>") | |
return " ".join(result) | |
def filterout_jobs(jobs, job_filter, job_filter_negative): | |
selected_jobs = [] | |
for index, job in jobs.iterrows(): | |
if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter) and "{}".format(job["description"]) not in ["null", "nan", "None"]: | |
job["ai_result"] = get_offer_information(job["company"], job["description"]) | |
if job["ai_result"]["is_an_internship"] == False: | |
selected_jobs.append(job) | |
return selected_jobs | |
def html_format_page(jobs, job_filter, job_filter_negative): | |
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative) | |
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"] | |
if len(selected_jobs) > 0: | |
for job in selected_jobs: | |
result.append(html_format_job(job)) | |
else: | |
result.append("No job found") | |
result.append("</body></html>") | |
return " ".join(result) | |
def get_jobs(search_term, results_wanted): | |
return scrape_jobs( | |
site_name=["linkedin"],#, "linkedin", "glassdoor"], | |
search_term=search_term, | |
location="Paris, France", | |
job_type="fulltime", | |
results_wanted=results_wanted, | |
#hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old) | |
linkedin_fetch_description=True, | |
enforce_annual_salary=True, | |
) | |
def linkedin_get_html(search_term): | |
jobs = get_jobs(search_term, 50) | |
#filter on the job description | |
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"] | |
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"] | |
return html_format_page(jobs, job_filter, job_filter_negative) | |