Spaces:

PaulMartrenchar
/

jobsearch

Running

App Files Files Community

PaulMartrenchar commited on Dec 18, 2024

Commit

cd58373

1 Parent(s): 1e0326f

First version

Browse files

Files changed (5) hide show

WelcomeToTheJungle.py +265 -0
app.py +34 -0
jobspy_indeed.py +206 -0
jobspy_linkedin.py +213 -0
requirements.txt +4 -0

WelcomeToTheJungle.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import requests
+import json
+from datetime import datetime
+import warnings
+from mistralai import Mistral, SDKError
+from time import sleep
+from bs4 import BeautifulSoup
+from markdownify import markdownify
+warnings.filterwarnings("ignore")
+import os
+models = ["mistral-small-2409", "open-mistral-nemo"]
+import random
+def get_model():
+    return random.choice(models)
+def call_ai(prompt, json_mode):
+    try:
+        return _call_ai(prompt, json_mode)
+    except SDKError as e:
+        #Wait, then try again once
+        sleep(11)
+        return _call_ai(prompt, json_mode)
+    except Exception as e:
+        # Throw the error if it's not an SDKError
+        raise
+def _call_ai(prompt, json_mode):
+    sleep(1.1)
+    client = Mistral(api_key=os.environ['MISTRAL_KEY'])
+    extra_param = {}
+    if json_mode:
+        extra_param = { "response_format" : {"type": "json_object"} }
+    chat_response = client.chat.complete(
+        model = get_model(),
+        messages = [
+            {
+                "role": "user",
+                "content": prompt,
+            },
+        ],
+        **extra_param
+    )
+    return chat_response.choices[0].message.content
+def get_offer_information(company, offer):
+    try:
+        return _get_offer_information(company, offer)
+    except json.decoder.JSONDecodeError as e:
+        #try again once
+        return _get_offer_information(company, offer)
+    except Exception as e:
+        # Throw the error if it's not an SDKError
+        raise
+def _get_offer_information(company, offer):
+    prompt = """This is a job offer from the company '{}', make a JSON with this information:
+- company_description (string): a description of the company in less than 15 words.
+- position_summary (string): a summary of the role in 3 bullet points
+- language_requirements (string): the language requirements in French and English
+- experience_requirements (string): the experience requirements
+- is_an_internship (Boolean): true if it's an internship, false otherwise
+- salary_range (string): the salary range in yearly salary if stated, write 'unknown' otherwise
+- should_apply (Boolean): True if the offer requires up to 2 years of work experience and does not ask for other languages than English, French, Hindi or Nepali
+Be concise in each answer. Answer in English.
+Example:
+{{
+'company_description': 'Galileo Global Education: A leading international network of higher education institutions.',
+'position_summary': 'Project Manager Marketing and Communication: Develop brand experience, manage marketing/communication plan, ensure brand image, monitor e-reputation, create content, and collaborate with digital team.',
+'language_requirements': 'French Fluent and English Native',
+'experience_requirements': 'Previous experience in a similar role, preferably in an agency.',
+'is_an_internship': false,
+'salary_range': '€38,000-€42,000',
+'should_apply': true,
+}}
+Offer:
+{}""".format(company, offer)
+    result = call_ai(prompt, True)
+    obj = json.loads(result)
+    print(obj)
+    #Check result
+    if not "company_description" in obj:
+        obj["company_description"] = ""
+    if not "position_summary" in obj:
+        obj["position_summary"] = ""
+    if not "language_requirements" in obj:
+        obj["language_requirements"] = ""
+    if not "experience_requirements" in obj:
+        obj["experience_requirements"] = ""
+    if not "is_an_internship" in obj:
+        obj["is_an_internship"] = False
+    if not "salary_range" in obj:
+        obj["salary_range"] = ""
+    if not "should_apply" in obj:
+        obj["should_apply"] = True
+    return obj
+def get_offer(url):
+    response = requests.get(url, verify=False)
+    if response.status_code == 200:
+        # Extract the text from the response
+        soup = BeautifulSoup(response.text, 'html.parser')
+        match = soup.find('div', {'id': 'the-position-section'})
+        text = match.text.rstrip().lstrip()
+        return markdownify(text)
+    else:
+        return ""
+def get_extra_information_from_ai(company, url):
+    offer = get_offer(url)
+    return get_offer_information(company, offer)
+def get_salary(job):
+    if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
+        return job["ai_result"]["salary_range"]
+    return ""
+def format_should_apply(should_apply):
+    if should_apply:
+        return "&#x2B50; "
+    return ""
+def get_logo(job):
+    if "{}".format(job["logo_photo_url"]) == "nan":
+        return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
+    return job["logo_photo_url"]
+def format_str_or_list(sum):
+    if isinstance(sum, str):
+        return sum.replace("\n", "<br />")
+    if isinstance(sum, list):
+        return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
+    return sum
+def html_format_job(job):
+    #open box
+    result = ["<div class='job'>"]
+    #logo
+    result.append("<div class='logobox'><img src='{}' alt='Logo' class='logo'></div>".format(job["organization_logo_url"]))
+    #text part
+    result.append("<div style='flex: 5; padding: 10px;'>")
+    result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(job["URL"], format_should_apply(job["ai_result"]["should_apply"]), job["name"]))
+    result.append("<p>{} ({}) - published at {}</p>".format(job["organization_name"], job["ai_result"]["company_description"], job["published_at"]))
+    result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
+    result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
+    result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
+    #close text part
+    result.append("</div>")
+    #close box
+    result.append("</div>")
+    return " ".join(result)
+def filterout_jobs(jobs, job_filter, job_filter_negative):
+    selected_jobs = []
+    for job in jobs:
+        if not any(item in job["name"].lower() for item in job_filter_negative) and any(item in job["name"].lower() for item in job_filter):
+            job["ai_result"] = get_extra_information_from_ai(job["organization_name"], job["URL"])
+            if job["ai_result"]["is_an_internship"] == False:
+                selected_jobs.append(job)
+    return selected_jobs
+def html_format_page(jobs, job_filter, job_filter_negative):
+    selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
+    result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
+    for job in selected_jobs:
+        result.append(html_format_job(job))
+    result.append("</body></html>")
+    return " ".join(result)
+def get_jobs(search_term):
+    headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0",
+    "Accept": "*/*",
+    "Accept-Language": "en-US,en;q=0.5",
+    "Accept-Encoding": "gzip, deflate, br",
+    "Origin": "https://www.welcometothejungle.com",
+    "Connection": "keep-alive",
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "no-cors",
+    "Sec-Fetch-Site": "cross-site",
+    "content-type": "application/x-www-form-urlencoded",
+    "Referer": "https://www.welcometothejungle.com/",
+    "Pragma": "no-cache",
+    "Cache-Control": "no-cache",
+    "x-algolia-agent": "Algolia for JavaScript (4.14.3); Browser (lite); JS Helper (3.11.2); react (17.0.2); react-instantsearch (6.38.3)",
+    "x-algolia-api-key": "02f0d440abc99cae37e126886438b266",
+    "x-algolia-application-id": "CSEKHVMS53"
+    }
+    data = """{
+        "requests":[{
+            "indexName":"wk_cms_jobs_production_published_at_desc",
+            "params":"analyticsTags=%5B%22page%3Ajobs_index%22%2C%22language%3Aen%22%5D&aroundLatLng=48.85718%2C2.34141&aroundPrecision=20000&aroundRadius=20000&attributesToHighlight=%5B%22name%22%5D&attributesToRetrieve=%5B%22_geoloc%22%2C%22contract_type%22%2C%22experience_level_minimum%22%2C%22name%22%2C%22objectID%22%2C%22office%22%2C%22offices%22%2C%22organization.logo.url%22%2C%22organization.name%22%2C%22organization.reference%22%2C%22organization.slug%22%2C%22organization.website_organization%22%2C%22organization.descriptions%22%2C%22organization.has_default_job%22%2C%22promoted%22%2C%22published_at%22%2C%22reference%22%2C%22remote%22%2C%22slug%22%2C%22website%22%2C%22contract_type_names.en%22%2C%22organization.cover_image.en.small.url%22%2C%22organization.size.en%22%2C%22profession.category.en%22%2C%22profession.name.en%22%2C%22sectors_name.en%22%5D&clickAnalytics=true&facetFilters=%5B%5B%22contract_type_names.en%3AFull-Time%22%5D%2C%5B%22language%3Aen%22%5D%5D&facets=%5B%22offices.country_code%22%2C%22offices.state%22%2C%22offices.district%22%2C%22offices.location%22%2C%22online%22%2C%22organization.name%22%2C%22remote%22%2C%22contract_type_names.en%22%2C%22sectors_name.en.Advertising%20%2F%20Marketing%20%2F%20Agency%22%2C%22sectors_name.en.Architecture%22%2C%22sectors_name.en.Banking%20%2F%20Insurance%20%2F%20Finance%22%2C%22sectors_name.en.Consulting%20%2F%20Audit%22%2C%22sectors_name.en.Corporate%20Services%22%2C%22sectors_name.en.Culture%20%2F%20Media%20%2F%20Entertainment%22%2C%22sectors_name.en.Distribution%22%2C%22sectors_name.en.Education%20%2F%20Training%20%2F%20Recruitment%22%2C%22sectors_name.en.Engineering%22%2C%22sectors_name.en.Fashion%20%2F%20Luxury%20%2F%20Beauty%20%2F%20Lifestyle%22%2C%22sectors_name.en.Food%20and%20Beverage%22%2C%22sectors_name.en.Health%20%2F%20Social%20%2F%20Environment%22%2C%22sectors_name.en.Hotel%20%2F%20Tourism%20%2F%20Leisure%22%2C%22sectors_name.en.Industry%22%2C%22sectors_name.en.Legal%20%2F%20Law%22%2C%22sectors_name.en.Mobility%20%2F%20Transport%22%2C%22sectors_name.en.Nonprofit%20%2F%20Association%22%2C%22sectors_name.en.Public%20Administration%22%2C%22sectors_name.en.Real%20Estate%22%2C%22sectors_name.en.Tech%22%2C%22sectors.parent.en%22%2C%22profession_name.en.Audit%20%2F%20Finance%20%2F%20Insurance%22%2C%22profession_name.en.Business%22%2C%22profession_name.en.Consulting%22%2C%22profession_name.en.Customer%20Service%22%2C%22profession_name.en.Design%22%2C%22profession_name.en.Fashion%22%2C%22profession_name.en.Health%20%2F%20Medical%20%2F%20Social%22%2C%22profession_name.en.Hospitality%20%2F%20Restaurant%20services%22%2C%22profession_name.en.Industry%22%2C%22profession_name.en.Marketing%20%2F%20Communications%22%2C%22profession_name.en.Media%22%2C%22profession_name.en.Real%20Estate%22%2C%22profession_name.en.Retail%22%2C%22profession_name.en.Support%20Roles%22%2C%22profession_name.en.Tech%22%2C%22profession_name.en.Tourism%22%2C%22profession.category.en%22%2C%22experience_level_minimum%22%2C%22organization.size.en%22%2C%22language%22%5D&filters=website.reference%3Awttj_fr&getRankingInfo=true&highlightPostTag=%3C%2Fais-highlight-0000000000%3E&highlightPreTag=%3Cais-highlight-0000000000%3E&hitsPerPage=80&maxValuesPerFacet=999&numericFilters=%5B%22experience_level_minimum%3E%3D0%22%2C%22experience_level_minimum%3C%3D2%22%5D&page=0&query=#####&tagFilters=&userToken=00c5e1a5-e384-4def-bae4-1d466974cc2d"
+            }]
+        }""".replace("#####", search_term.lower().replace(" ", "%20"))
+    url = "https://csekhvms53-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia^%^20for^%^20JavaScript^%^20(4.14.3)^%^3B^%^20Browser^%^20(lite)^%^3B^%^20JS^%^20Helper^%^20(3.11.2)^%^3B^%^20react^%^20(17.0.2)^%^3B^%^20react-instantsearch^%^20(6.38.3)&x-algolia-api-key=02f0d440abc99cae37e126886438b266&x-algolia-application-id=CSEKHVMS53&search_origin=jobs_search_client"
+    response = requests.post(url, headers=headers, data=data, verify=False)
+    #parse result
+    jsonResponse = json.loads(response.text)
+    results = jsonResponse["results"]
+    hits = results[0]["hits"]
+    jobs = []
+    for hit in hits:
+        #get the info
+        job = {}
+        job["name"] = hit["name"]
+        job["slug"] = hit["slug"]
+        if hit["published_at"] != None:
+            published_at = datetime.strptime(hit["published_at"], '%Y-%m-%dT%H:%M:%S.%f%z')
+            job["published_at"] = published_at.strftime("%d/%m/%Y %H:%M:%S")
+        else:
+            job["published_at"] = "None"
+        job["organization_name"] = hit["organization"]["name"]
+        if hit["organization"].get("size", None) is not None:
+            job["organization_size"] = hit["organization"]["size"]["en"]
+        else:
+            job["organization_size"] = ""
+        job["organization_logo_url"] = hit["organization"]["logo"]["url"]
+        job["organization_slug"] = hit["organization"]["website_organization"]["slug"]
+        job["objectID"] = hit["objectID"]
+        job["URL"] = "https://www.welcometothejungle.com/en/companies/{}/jobs/{}?o={}".format(job["organization_slug"], job["slug"], job["objectID"])
+        jobs.append(job)
+    return jobs
+def wtoj_get_html():
+    content_writer = get_jobs('content writer')
+    digital_marketing = get_jobs('Marketing')
+    communication = get_jobs("Communication")
+    business_dev = get_jobs('Business development')
+    seo = get_jobs("SEO")
+    merged_list = content_writer + digital_marketing + communication + business_dev + seo
+    seen_urls = set()
+    unique_objects = []
+    for obj in merged_list:
+        if obj["URL"] not in seen_urls:
+            seen_urls.add(obj["URL"])
+            unique_objects.append(obj)
+    jobs = sorted(unique_objects, key=lambda x: x["published_at"], reverse=True)
+    #filter on the job description
+    job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
+    job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
+    return html_format_page(jobs, job_filter, job_filter_negative)

app.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import gradio as gr
+import os
+from jobspy_indeed import indeed_get_html
+from WelcomeToTheJungle import wtoj_get_html
+from jobspy_linkedin import linkedin_get_html
+def search_jobs(api_key, platform):
+    if api_key == "":
+        raise gr.Error("API key is required")
+    os.environ['MISTRAL_KEY'] = api_key
+    if platform == "Indeed":
+        return indeed_get_html()
+    elif platform == "Welcome to the jungle":
+        return wtoj_get_html()
+    elif platform == "LinkedIn":
+        return linkedin_get_html()
+    raise gr.Error("No platform selected")
+api_key = gr.Textbox(label="API key")
+platform = gr.Radio(choices=["Welcome to the jungle", "Indeed", "LinkedIn"], label="Platform")
+output_html = gr.HTML(label="Result", value="<html><br/><br/><br/><br/></html>")
+demo = gr.Interface(
+    fn=search_jobs,
+    inputs=[api_key, platform],
+    outputs=[output_html],
+    flagging_mode="never",
+    show_progress="full",
+    clear_btn=None,
+    title="Job search"
+)
+demo.launch()

jobspy_indeed.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import json
+import warnings
+from mistralai import Mistral, SDKError
+from time import sleep
+from jobspy import scrape_jobs
+warnings.filterwarnings("ignore")
+import os
+models = ["mistral-small-2409", "open-mistral-nemo"]
+import random
+def get_model():
+    return random.choice(models)
+def call_ai(prompt, json_mode):
+    try:
+        return _call_ai(prompt, json_mode)
+    except SDKError as e:
+        #Wait, then try again once
+        sleep(11)
+        return _call_ai(prompt, json_mode)
+    except Exception as e:
+        # Throw the error if it's not an SDKError
+        raise
+def _call_ai(prompt, json_mode):
+    sleep(1.1)
+    client = Mistral(api_key=os.environ['MISTRAL_KEY'])
+    extra_param = {}
+    if json_mode:
+        extra_param = { "response_format" : {"type": "json_object"} }
+    chat_response = client.chat.complete(
+        model = get_model(),
+        messages = [
+            {
+                "role": "user",
+                "content": prompt,
+            },
+        ],
+        **extra_param
+    )
+    return chat_response.choices[0].message.content
+def get_offer_information(company, offer):
+    try:
+        return _get_offer_information(company, offer)
+    except json.decoder.JSONDecodeError as e:
+        #try again once
+        return _get_offer_information(company, offer)
+    except Exception as e:
+        # Throw the error if it's not an SDKError
+        raise
+def _get_offer_information(company, offer):
+    prompt = """This is a job offer from the company '{}', make a JSON with this information:
+- company_description (string): a description of the company in less than 15 words.
+- position_summary (string): a summary of the role in 3 bullet points
+- language_requirements (string): the language requirements in French and English
+- experience_requirements (string): the experience requirements
+- is_an_internship (Boolean): true if it's an internship, false otherwise
+- salary_range (string): the salary range in yearly salary if stated, write 'unknown' otherwise
+- should_apply (Boolean): True if the offer requires up to 2 years of work experience and does not ask for other languages than English, French, Hindi or Nepali
+Be concise in each answer. Answer in English.
+Example:
+{{
+'company_description': 'Galileo Global Education: A leading international network of higher education institutions.',
+'position_summary': 'Project Manager Marketing and Communication: Develop brand experience, manage marketing/communication plan, ensure brand image, monitor e-reputation, create content, and collaborate with digital team.',
+'language_requirements': 'French Fluent and English Native',
+'experience_requirements': 'Previous experience in a similar role, preferably in an agency.',
+'is_an_internship': false,
+'salary_range': '€38,000-€42,000',
+'should_apply': true,
+}}
+Offer:
+{}""".format(company, offer)
+    result = call_ai(prompt, True)
+    obj = json.loads(result)
+    print(obj)
+    #Check result
+    if not "company_description" in obj:
+        obj["company_description"] = ""
+    if not "position_summary" in obj:
+        obj["position_summary"] = ""
+    if not "language_requirements" in obj:
+        obj["language_requirements"] = ""
+    if not "experience_requirements" in obj:
+        obj["experience_requirements"] = ""
+    if not "is_an_internship" in obj:
+        obj["is_an_internship"] = False
+    if not "salary_range" in obj:
+        obj["salary_range"] = ""
+    if not "should_apply" in obj:
+        obj["should_apply"] = True
+    return obj
+def get_job_url(job):
+    if job["job_url_direct"] == "":
+        return job["job_url"]
+    return job["job_url_direct"]
+def get_company_url(job):
+    if job["company_url_direct"] == "":
+        return job["company_url"]
+    return job["company_url_direct"]
+def get_salary(job):
+    if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
+        if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
+            return job["ai_result"]["salary_range"]
+        return ""
+    return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
+def format_should_apply(should_apply):
+    if should_apply:
+        return "&#x2B50; "
+    return ""
+def get_logo(job):
+    if "{}".format(job["logo_photo_url"]) == "nan":
+        return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
+    return job["logo_photo_url"]
+def format_str_or_list(sum):
+    if isinstance(sum, str):
+        return sum.replace("\n", "<br />")
+    if isinstance(sum, list):
+        return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
+    return sum
+def html_format_job(job):
+    #open box
+    result = ["<div class='job'>"]
+    #logo
+    result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
+    #text part
+    result.append("<div style='flex: 5; padding: 10px;'>")
+    result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
+    result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], job["date_posted"].strftime("%d/%m/%Y")))
+    result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
+    result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
+    result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
+    #close text part
+    result.append("</div>")
+    #close box
+    result.append("</div>")
+    return " ".join(result)
+def filterout_jobs(jobs, job_filter, job_filter_negative):
+    selected_jobs = []
+    for index, job in jobs.iterrows():
+        if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter):
+            job["ai_result"] = get_offer_information(job["company"], job["description"])
+            if job["ai_result"]["is_an_internship"] == False:
+                selected_jobs.append(job)
+    return selected_jobs
+def html_format_page(jobs, job_filter, job_filter_negative):
+    selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
+    result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
+    for job in selected_jobs:
+        result.append(html_format_job(job))
+    result.append("</body></html>")
+    return " ".join(result)
+def get_jobs(search_term, results_wanted):
+    return scrape_jobs(
+        site_name=["indeed"],#, "linkedin", "glassdoor"],
+        search_term=search_term,
+        location="Paris, France",
+        job_type="fulltime",
+        results_wanted=results_wanted,
+        #hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old)
+        country_indeed='France',  # only needed for indeed / glassdoor
+        enforce_annual_salary=True,
+        linkedin_fetch_description=False, # get more info such as full description, direct job url for linkedin (slower)
+    )
+def indeed_get_html():
+    content_writer = get_jobs('"content writer"', 50)
+    digital_marketing = get_jobs('"Digital Marketing"', 50)
+    communication = get_jobs("Communication", 50)
+    business_dev = get_jobs('"Business development"', 50)
+    seo = get_jobs("SEO", 50)
+    import pandas as pd
+    jobs = pd.concat([content_writer, digital_marketing, communication, business_dev, seo], ignore_index=True).drop_duplicates(subset='id').sort_values(by='date_posted', ascending=False)#.head(3)
+    """
+    jobs=get_jobs('"Digital Marketing"', 20)
+    """
+    #filter on the job description
+    job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
+    job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
+    return html_format_page(jobs, job_filter, job_filter_negative)

jobspy_linkedin.py ADDED Viewed

	@@ -0,0 +1,213 @@

+import json
+import warnings
+import datetime
+from mistralai import Mistral, SDKError
+from time import sleep
+from jobspy import scrape_jobs
+warnings.filterwarnings("ignore")
+import os
+models = ["mistral-small-2409", "open-mistral-nemo"]
+import random
+def get_model():
+    return random.choice(models)
+def call_ai(prompt, json_mode):
+    try:
+        return _call_ai(prompt, json_mode)
+    except SDKError as e:
+        #Wait, then try again once
+        sleep(11)
+        return _call_ai(prompt, json_mode)
+    except Exception as e:
+        # Throw the error if it's not an SDKError
+        raise
+def _call_ai(prompt, json_mode):
+    sleep(1.1)
+    client = Mistral(api_key=os.environ['MISTRAL_KEY'])
+    extra_param = {}
+    if json_mode:
+        extra_param = { "response_format" : {"type": "json_object"} }
+    chat_response = client.chat.complete(
+        model = get_model(),
+        messages = [
+            {
+                "role": "user",
+                "content": prompt,
+            },
+        ],
+        **extra_param
+    )
+    return chat_response.choices[0].message.content
+def get_offer_information(company, offer):
+    try:
+        return _get_offer_information(company, offer)
+    except json.decoder.JSONDecodeError as e:
+        #try again once
+        return _get_offer_information(company, offer)
+    except Exception as e:
+        # Throw the error if it's not an SDKError
+        raise
+def _get_offer_information(company, offer):
+    prompt = """This is a job offer from the company '{}', make a JSON with this information:
+- company_description (string): a description of the company in less than 15 words.
+- position_summary (string): a summary of the role in 3 bullet points
+- language_requirements (string): the language requirements in French and English
+- experience_requirements (string): the experience requirements
+- is_an_internship (Boolean): true if it's an internship, false otherwise
+- salary_range (string): the salary range in yearly salary if stated, write 'unknown' otherwise
+- should_apply (Boolean): True if the offer requires up to 2 years of work experience and does not ask for other languages than English, French, Hindi or Nepali
+Be concise in each answer. Answer in English.
+Example:
+{{
+'company_description': 'Galileo Global Education: A leading international network of higher education institutions.',
+'position_summary': 'Project Manager Marketing and Communication: Develop brand experience, manage marketing/communication plan, ensure brand image, monitor e-reputation, create content, and collaborate with digital team.',
+'language_requirements': 'French Fluent and English Native',
+'experience_requirements': 'Previous experience in a similar role, preferably in an agency.',
+'is_an_internship': false,
+'salary_range': '€38,000-€42,000',
+'should_apply': true,
+}}
+Offer:
+{}""".format(company, offer)
+    result = call_ai(prompt, True)
+    obj = json.loads(result)
+    print(obj)
+    #Check result
+    if not "company_description" in obj:
+        obj["company_description"] = ""
+    if not "position_summary" in obj:
+        obj["position_summary"] = ""
+    if not "language_requirements" in obj:
+        obj["language_requirements"] = ""
+    if not "experience_requirements" in obj:
+        obj["experience_requirements"] = ""
+    if not "is_an_internship" in obj:
+        obj["is_an_internship"] = False
+    if not "salary_range" in obj:
+        obj["salary_range"] = ""
+    if not "should_apply" in obj:
+        obj["should_apply"] = True
+    return obj
+def get_job_url(job):
+    if "{}".format(job["job_url_direct"]) in ["null", "nan", "None"]:
+        return job["job_url"]
+    return job["job_url_direct"]
+def get_company_url(job):
+    if "{}".format(job["company_url_direct"]) in ["null", "nan", "None"]:
+        return job["company_url"]
+    return job["company_url_direct"]
+def get_salary(job):
+    if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
+        if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
+            return job["ai_result"]["salary_range"]
+        return ""
+    return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
+def format_should_apply(should_apply):
+    if should_apply:
+        return "&#x2B50; "
+    return ""
+def get_logo(job):
+    if "{}".format(job["logo_photo_url"]) == "nan":
+        return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
+    return job["logo_photo_url"]
+def format_str_or_list(sum):
+    if isinstance(sum, str):
+        return sum.replace("\n", "<br />")
+    if isinstance(sum, list):
+        return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
+    return sum
+def format_posted_date(date):
+    if "{}".format(date) == "nan":
+        return "?"
+    if isinstance(date, str):
+        return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
+    return date.strftime("%d/%m/%Y")
+def html_format_job(job):
+    #open box
+    result = ["<div class='job'>"]
+    #logo
+    result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
+    #text part
+    result.append("<div style='flex: 5; padding: 10px;'>")
+    result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
+    result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
+    result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
+    result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
+    result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
+    #close text part
+    result.append("</div>")
+    #close box
+    result.append("</div>")
+    return " ".join(result)
+def filterout_jobs(jobs, job_filter, job_filter_negative):
+    selected_jobs = []
+    for index, job in jobs.iterrows():
+        if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter) and "{}".format(job["description"]) not in ["null", "nan", "None"]:
+            job["ai_result"] = get_offer_information(job["company"], job["description"])
+            if job["ai_result"]["is_an_internship"] == False:
+                selected_jobs.append(job)
+    return selected_jobs
+def html_format_page(jobs, job_filter, job_filter_negative):
+    selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
+    result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
+    for job in selected_jobs:
+        result.append(html_format_job(job))
+    result.append("</body></html>")
+    return " ".join(result)
+def get_jobs(search_term, results_wanted):
+    return scrape_jobs(
+        site_name=["linkedin"],#, "linkedin", "glassdoor"],
+        search_term=search_term,
+        location="Paris, France",
+        job_type="fulltime",
+        results_wanted=results_wanted,
+        #hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old)
+        linkedin_fetch_description=True,
+        enforce_annual_salary=True,
+    )
+def linkedin_get_html():
+    content_writer = get_jobs('"content writer"', 50)
+    digital_marketing = get_jobs('"Digital Marketing"', 50)
+    communication = get_jobs("Communication", 50)
+    business_dev = get_jobs('"Business development"', 50)
+    seo = get_jobs("SEO", 50)
+    import pandas as pd
+    jobs = pd.concat([content_writer, digital_marketing, communication, business_dev, seo], ignore_index=True).drop_duplicates(subset='id').sort_values(by='date_posted', ascending=False)#.head(3)
+    """
+    jobs=get_jobs('"Digital Marketing"', 5)
+    """
+    #filter on the job description
+    job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
+    job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
+    return html_format_page(jobs, job_filter, job_filter_negative)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+mistralai
+jobspy
+markdownify
+beautifulsoup4