Spaces:

PaulMartrenchar
/

jobsearch

Running

App Files Files Community

jobsearch / jobspy_linkedin.py

PaulMartrenchar

move ai part in new ai_manager.py

fd5aa4d 3 months ago

raw

history blame

5.03 kB

	import warnings
	import datetime

	from jobspy import scrape_jobs

	warnings.filterwarnings("ignore")
	from ai_manager import get_offer_information

	def get_job_url(job):
	if "{}".format(job["job_url_direct"]) in ["null", "nan", "None"]:
	return job["job_url"]
	return job["job_url_direct"]

	def get_company_url(job):
	if "{}".format(job["company_url_direct"]) in ["null", "nan", "None"]:
	return job["company_url"]
	return job["company_url_direct"]

	def get_salary(job):
	if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
	if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
	return job["ai_result"]["salary_range"]
	return ""
	return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])

	def format_should_apply(should_apply):
	if should_apply:
	return "⭐ "
	return ""

	def get_logo(job):
	try:
	if "{}".format(job["logo_photo_url"]) == "nan":
	return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
	return job["logo_photo_url"]
	except:
	return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"

	def format_str_or_list(sum):
	if isinstance(sum, str):
	return sum.replace("\n", "<br />")
	if isinstance(sum, list):
	return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
	return sum

	def format_posted_date(date):
	if "{}".format(date) == "nan":
	return "?"
	if isinstance(date, str):
	return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
	return date.strftime("%d/%m/%Y")

	def html_format_job(job):
	#open box
	result = ["<div class='job'>"]
	#logo
	result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
	#text part
	result.append("<div style='flex: 5; padding: 10px;'>")
	result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
	result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
	result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
	result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
	result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
	#close text part
	result.append("</div>")
	#close box
	result.append("</div>")
	return " ".join(result)

	def filterout_jobs(jobs, job_filter, job_filter_negative):
	selected_jobs = []
	for index, job in jobs.iterrows():
	if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter) and "{}".format(job["description"]) not in ["null", "nan", "None"]:
	job["ai_result"] = get_offer_information(job["company"], job["description"])
	if job["ai_result"]["is_an_internship"] == False:
	selected_jobs.append(job)

	return selected_jobs

	def html_format_page(jobs, job_filter, job_filter_negative):
	selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
	result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
	if len(selected_jobs) > 0:
	for job in selected_jobs:
	result.append(html_format_job(job))
	else:
	result.append("No job found")
	result.append("</body></html>")
	return " ".join(result)


	def get_jobs(search_term, results_wanted):
	return scrape_jobs(
	site_name=["linkedin"],#, "linkedin", "glassdoor"],
	search_term=search_term,
	location="Paris, France",
	job_type="fulltime",
	results_wanted=results_wanted,
	#hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old)
	linkedin_fetch_description=True,
	enforce_annual_salary=True,
	)

	def linkedin_get_html(search_term):
	jobs = get_jobs(search_term, 50)

	#filter on the job description
	job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
	job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]

	return html_format_page(jobs, job_filter, job_filter_negative)