Spaces:
Running
Running
Commit
·
cd58373
1
Parent(s):
1e0326f
First version
Browse files- WelcomeToTheJungle.py +265 -0
- app.py +34 -0
- jobspy_indeed.py +206 -0
- jobspy_linkedin.py +213 -0
- requirements.txt +4 -0
WelcomeToTheJungle.py
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
from datetime import datetime
|
4 |
+
import warnings
|
5 |
+
from mistralai import Mistral, SDKError
|
6 |
+
from time import sleep
|
7 |
+
from bs4 import BeautifulSoup
|
8 |
+
from markdownify import markdownify
|
9 |
+
|
10 |
+
warnings.filterwarnings("ignore")
|
11 |
+
import os
|
12 |
+
|
13 |
+
models = ["mistral-small-2409", "open-mistral-nemo"]
|
14 |
+
|
15 |
+
import random
|
16 |
+
def get_model():
|
17 |
+
return random.choice(models)
|
18 |
+
|
19 |
+
def call_ai(prompt, json_mode):
|
20 |
+
try:
|
21 |
+
return _call_ai(prompt, json_mode)
|
22 |
+
except SDKError as e:
|
23 |
+
#Wait, then try again once
|
24 |
+
sleep(11)
|
25 |
+
return _call_ai(prompt, json_mode)
|
26 |
+
except Exception as e:
|
27 |
+
# Throw the error if it's not an SDKError
|
28 |
+
raise
|
29 |
+
|
30 |
+
def _call_ai(prompt, json_mode):
|
31 |
+
sleep(1.1)
|
32 |
+
client = Mistral(api_key=os.environ['MISTRAL_KEY'])
|
33 |
+
|
34 |
+
extra_param = {}
|
35 |
+
if json_mode:
|
36 |
+
extra_param = { "response_format" : {"type": "json_object"} }
|
37 |
+
|
38 |
+
chat_response = client.chat.complete(
|
39 |
+
model = get_model(),
|
40 |
+
messages = [
|
41 |
+
{
|
42 |
+
"role": "user",
|
43 |
+
"content": prompt,
|
44 |
+
},
|
45 |
+
],
|
46 |
+
**extra_param
|
47 |
+
)
|
48 |
+
|
49 |
+
return chat_response.choices[0].message.content
|
50 |
+
|
51 |
+
def get_offer_information(company, offer):
|
52 |
+
try:
|
53 |
+
return _get_offer_information(company, offer)
|
54 |
+
except json.decoder.JSONDecodeError as e:
|
55 |
+
#try again once
|
56 |
+
return _get_offer_information(company, offer)
|
57 |
+
except Exception as e:
|
58 |
+
# Throw the error if it's not an SDKError
|
59 |
+
raise
|
60 |
+
|
61 |
+
def _get_offer_information(company, offer):
|
62 |
+
prompt = """This is a job offer from the company '{}', make a JSON with this information:
|
63 |
+
- company_description (string): a description of the company in less than 15 words.
|
64 |
+
- position_summary (string): a summary of the role in 3 bullet points
|
65 |
+
- language_requirements (string): the language requirements in French and English
|
66 |
+
- experience_requirements (string): the experience requirements
|
67 |
+
- is_an_internship (Boolean): true if it's an internship, false otherwise
|
68 |
+
- salary_range (string): the salary range in yearly salary if stated, write 'unknown' otherwise
|
69 |
+
- should_apply (Boolean): True if the offer requires up to 2 years of work experience and does not ask for other languages than English, French, Hindi or Nepali
|
70 |
+
|
71 |
+
Be concise in each answer. Answer in English.
|
72 |
+
|
73 |
+
Example:
|
74 |
+
{{
|
75 |
+
'company_description': 'Galileo Global Education: A leading international network of higher education institutions.',
|
76 |
+
'position_summary': 'Project Manager Marketing and Communication: Develop brand experience, manage marketing/communication plan, ensure brand image, monitor e-reputation, create content, and collaborate with digital team.',
|
77 |
+
'language_requirements': 'French Fluent and English Native',
|
78 |
+
'experience_requirements': 'Previous experience in a similar role, preferably in an agency.',
|
79 |
+
'is_an_internship': false,
|
80 |
+
'salary_range': '€38,000-€42,000',
|
81 |
+
'should_apply': true,
|
82 |
+
}}
|
83 |
+
|
84 |
+
Offer:
|
85 |
+
{}""".format(company, offer)
|
86 |
+
result = call_ai(prompt, True)
|
87 |
+
obj = json.loads(result)
|
88 |
+
print(obj)
|
89 |
+
#Check result
|
90 |
+
if not "company_description" in obj:
|
91 |
+
obj["company_description"] = ""
|
92 |
+
if not "position_summary" in obj:
|
93 |
+
obj["position_summary"] = ""
|
94 |
+
if not "language_requirements" in obj:
|
95 |
+
obj["language_requirements"] = ""
|
96 |
+
if not "experience_requirements" in obj:
|
97 |
+
obj["experience_requirements"] = ""
|
98 |
+
if not "is_an_internship" in obj:
|
99 |
+
obj["is_an_internship"] = False
|
100 |
+
if not "salary_range" in obj:
|
101 |
+
obj["salary_range"] = ""
|
102 |
+
if not "should_apply" in obj:
|
103 |
+
obj["should_apply"] = True
|
104 |
+
|
105 |
+
return obj
|
106 |
+
|
107 |
+
def get_offer(url):
|
108 |
+
response = requests.get(url, verify=False)
|
109 |
+
|
110 |
+
if response.status_code == 200:
|
111 |
+
# Extract the text from the response
|
112 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
113 |
+
match = soup.find('div', {'id': 'the-position-section'})
|
114 |
+
text = match.text.rstrip().lstrip()
|
115 |
+
|
116 |
+
return markdownify(text)
|
117 |
+
|
118 |
+
else:
|
119 |
+
return ""
|
120 |
+
|
121 |
+
def get_extra_information_from_ai(company, url):
|
122 |
+
offer = get_offer(url)
|
123 |
+
return get_offer_information(company, offer)
|
124 |
+
|
125 |
+
def get_salary(job):
|
126 |
+
if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
|
127 |
+
return job["ai_result"]["salary_range"]
|
128 |
+
return ""
|
129 |
+
|
130 |
+
def format_should_apply(should_apply):
|
131 |
+
if should_apply:
|
132 |
+
return "⭐ "
|
133 |
+
return ""
|
134 |
+
|
135 |
+
def get_logo(job):
|
136 |
+
if "{}".format(job["logo_photo_url"]) == "nan":
|
137 |
+
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
|
138 |
+
return job["logo_photo_url"]
|
139 |
+
|
140 |
+
def format_str_or_list(sum):
|
141 |
+
if isinstance(sum, str):
|
142 |
+
return sum.replace("\n", "<br />")
|
143 |
+
if isinstance(sum, list):
|
144 |
+
return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
|
145 |
+
return sum
|
146 |
+
|
147 |
+
def html_format_job(job):
|
148 |
+
#open box
|
149 |
+
result = ["<div class='job'>"]
|
150 |
+
#logo
|
151 |
+
result.append("<div class='logobox'><img src='{}' alt='Logo' class='logo'></div>".format(job["organization_logo_url"]))
|
152 |
+
#text part
|
153 |
+
result.append("<div style='flex: 5; padding: 10px;'>")
|
154 |
+
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(job["URL"], format_should_apply(job["ai_result"]["should_apply"]), job["name"]))
|
155 |
+
result.append("<p>{} ({}) - published at {}</p>".format(job["organization_name"], job["ai_result"]["company_description"], job["published_at"]))
|
156 |
+
result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
|
157 |
+
result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
|
158 |
+
result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
|
159 |
+
#close text part
|
160 |
+
result.append("</div>")
|
161 |
+
#close box
|
162 |
+
result.append("</div>")
|
163 |
+
return " ".join(result)
|
164 |
+
|
165 |
+
def filterout_jobs(jobs, job_filter, job_filter_negative):
|
166 |
+
selected_jobs = []
|
167 |
+
for job in jobs:
|
168 |
+
if not any(item in job["name"].lower() for item in job_filter_negative) and any(item in job["name"].lower() for item in job_filter):
|
169 |
+
job["ai_result"] = get_extra_information_from_ai(job["organization_name"], job["URL"])
|
170 |
+
if job["ai_result"]["is_an_internship"] == False:
|
171 |
+
selected_jobs.append(job)
|
172 |
+
|
173 |
+
return selected_jobs
|
174 |
+
|
175 |
+
def html_format_page(jobs, job_filter, job_filter_negative):
|
176 |
+
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
|
177 |
+
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
178 |
+
for job in selected_jobs:
|
179 |
+
result.append(html_format_job(job))
|
180 |
+
result.append("</body></html>")
|
181 |
+
return " ".join(result)
|
182 |
+
|
183 |
+
def get_jobs(search_term):
|
184 |
+
headers = {
|
185 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0",
|
186 |
+
"Accept": "*/*",
|
187 |
+
"Accept-Language": "en-US,en;q=0.5",
|
188 |
+
"Accept-Encoding": "gzip, deflate, br",
|
189 |
+
"Origin": "https://www.welcometothejungle.com",
|
190 |
+
"Connection": "keep-alive",
|
191 |
+
"Sec-Fetch-Dest": "empty",
|
192 |
+
"Sec-Fetch-Mode": "no-cors",
|
193 |
+
"Sec-Fetch-Site": "cross-site",
|
194 |
+
"content-type": "application/x-www-form-urlencoded",
|
195 |
+
"Referer": "https://www.welcometothejungle.com/",
|
196 |
+
"Pragma": "no-cache",
|
197 |
+
"Cache-Control": "no-cache",
|
198 |
+
"x-algolia-agent": "Algolia for JavaScript (4.14.3); Browser (lite); JS Helper (3.11.2); react (17.0.2); react-instantsearch (6.38.3)",
|
199 |
+
"x-algolia-api-key": "02f0d440abc99cae37e126886438b266",
|
200 |
+
"x-algolia-application-id": "CSEKHVMS53"
|
201 |
+
}
|
202 |
+
|
203 |
+
data = """{
|
204 |
+
"requests":[{
|
205 |
+
"indexName":"wk_cms_jobs_production_published_at_desc",
|
206 |
+
"params":"analyticsTags=%5B%22page%3Ajobs_index%22%2C%22language%3Aen%22%5D&aroundLatLng=48.85718%2C2.34141&aroundPrecision=20000&aroundRadius=20000&attributesToHighlight=%5B%22name%22%5D&attributesToRetrieve=%5B%22_geoloc%22%2C%22contract_type%22%2C%22experience_level_minimum%22%2C%22name%22%2C%22objectID%22%2C%22office%22%2C%22offices%22%2C%22organization.logo.url%22%2C%22organization.name%22%2C%22organization.reference%22%2C%22organization.slug%22%2C%22organization.website_organization%22%2C%22organization.descriptions%22%2C%22organization.has_default_job%22%2C%22promoted%22%2C%22published_at%22%2C%22reference%22%2C%22remote%22%2C%22slug%22%2C%22website%22%2C%22contract_type_names.en%22%2C%22organization.cover_image.en.small.url%22%2C%22organization.size.en%22%2C%22profession.category.en%22%2C%22profession.name.en%22%2C%22sectors_name.en%22%5D&clickAnalytics=true&facetFilters=%5B%5B%22contract_type_names.en%3AFull-Time%22%5D%2C%5B%22language%3Aen%22%5D%5D&facets=%5B%22offices.country_code%22%2C%22offices.state%22%2C%22offices.district%22%2C%22offices.location%22%2C%22online%22%2C%22organization.name%22%2C%22remote%22%2C%22contract_type_names.en%22%2C%22sectors_name.en.Advertising%20%2F%20Marketing%20%2F%20Agency%22%2C%22sectors_name.en.Architecture%22%2C%22sectors_name.en.Banking%20%2F%20Insurance%20%2F%20Finance%22%2C%22sectors_name.en.Consulting%20%2F%20Audit%22%2C%22sectors_name.en.Corporate%20Services%22%2C%22sectors_name.en.Culture%20%2F%20Media%20%2F%20Entertainment%22%2C%22sectors_name.en.Distribution%22%2C%22sectors_name.en.Education%20%2F%20Training%20%2F%20Recruitment%22%2C%22sectors_name.en.Engineering%22%2C%22sectors_name.en.Fashion%20%2F%20Luxury%20%2F%20Beauty%20%2F%20Lifestyle%22%2C%22sectors_name.en.Food%20and%20Beverage%22%2C%22sectors_name.en.Health%20%2F%20Social%20%2F%20Environment%22%2C%22sectors_name.en.Hotel%20%2F%20Tourism%20%2F%20Leisure%22%2C%22sectors_name.en.Industry%22%2C%22sectors_name.en.Legal%20%2F%20Law%22%2C%22sectors_name.en.Mobility%20%2F%20Transport%22%2C%22sectors_name.en.Nonprofit%20%2F%20Association%22%2C%22sectors_name.en.Public%20Administration%22%2C%22sectors_name.en.Real%20Estate%22%2C%22sectors_name.en.Tech%22%2C%22sectors.parent.en%22%2C%22profession_name.en.Audit%20%2F%20Finance%20%2F%20Insurance%22%2C%22profession_name.en.Business%22%2C%22profession_name.en.Consulting%22%2C%22profession_name.en.Customer%20Service%22%2C%22profession_name.en.Design%22%2C%22profession_name.en.Fashion%22%2C%22profession_name.en.Health%20%2F%20Medical%20%2F%20Social%22%2C%22profession_name.en.Hospitality%20%2F%20Restaurant%20services%22%2C%22profession_name.en.Industry%22%2C%22profession_name.en.Marketing%20%2F%20Communications%22%2C%22profession_name.en.Media%22%2C%22profession_name.en.Real%20Estate%22%2C%22profession_name.en.Retail%22%2C%22profession_name.en.Support%20Roles%22%2C%22profession_name.en.Tech%22%2C%22profession_name.en.Tourism%22%2C%22profession.category.en%22%2C%22experience_level_minimum%22%2C%22organization.size.en%22%2C%22language%22%5D&filters=website.reference%3Awttj_fr&getRankingInfo=true&highlightPostTag=%3C%2Fais-highlight-0000000000%3E&highlightPreTag=%3Cais-highlight-0000000000%3E&hitsPerPage=80&maxValuesPerFacet=999&numericFilters=%5B%22experience_level_minimum%3E%3D0%22%2C%22experience_level_minimum%3C%3D2%22%5D&page=0&query=#####&tagFilters=&userToken=00c5e1a5-e384-4def-bae4-1d466974cc2d"
|
207 |
+
}]
|
208 |
+
}""".replace("#####", search_term.lower().replace(" ", "%20"))
|
209 |
+
|
210 |
+
url = "https://csekhvms53-dsn.algolia.net/1/indexes/*/queries?x-algolia-agent=Algolia^%^20for^%^20JavaScript^%^20(4.14.3)^%^3B^%^20Browser^%^20(lite)^%^3B^%^20JS^%^20Helper^%^20(3.11.2)^%^3B^%^20react^%^20(17.0.2)^%^3B^%^20react-instantsearch^%^20(6.38.3)&x-algolia-api-key=02f0d440abc99cae37e126886438b266&x-algolia-application-id=CSEKHVMS53&search_origin=jobs_search_client"
|
211 |
+
|
212 |
+
response = requests.post(url, headers=headers, data=data, verify=False)
|
213 |
+
|
214 |
+
#parse result
|
215 |
+
jsonResponse = json.loads(response.text)
|
216 |
+
results = jsonResponse["results"]
|
217 |
+
hits = results[0]["hits"]
|
218 |
+
jobs = []
|
219 |
+
for hit in hits:
|
220 |
+
#get the info
|
221 |
+
job = {}
|
222 |
+
job["name"] = hit["name"]
|
223 |
+
job["slug"] = hit["slug"]
|
224 |
+
if hit["published_at"] != None:
|
225 |
+
published_at = datetime.strptime(hit["published_at"], '%Y-%m-%dT%H:%M:%S.%f%z')
|
226 |
+
job["published_at"] = published_at.strftime("%d/%m/%Y %H:%M:%S")
|
227 |
+
else:
|
228 |
+
job["published_at"] = "None"
|
229 |
+
job["organization_name"] = hit["organization"]["name"]
|
230 |
+
if hit["organization"].get("size", None) is not None:
|
231 |
+
job["organization_size"] = hit["organization"]["size"]["en"]
|
232 |
+
else:
|
233 |
+
job["organization_size"] = ""
|
234 |
+
job["organization_logo_url"] = hit["organization"]["logo"]["url"]
|
235 |
+
job["organization_slug"] = hit["organization"]["website_organization"]["slug"]
|
236 |
+
job["objectID"] = hit["objectID"]
|
237 |
+
job["URL"] = "https://www.welcometothejungle.com/en/companies/{}/jobs/{}?o={}".format(job["organization_slug"], job["slug"], job["objectID"])
|
238 |
+
jobs.append(job)
|
239 |
+
|
240 |
+
return jobs
|
241 |
+
|
242 |
+
def wtoj_get_html():
|
243 |
+
content_writer = get_jobs('content writer')
|
244 |
+
digital_marketing = get_jobs('Marketing')
|
245 |
+
communication = get_jobs("Communication")
|
246 |
+
business_dev = get_jobs('Business development')
|
247 |
+
seo = get_jobs("SEO")
|
248 |
+
|
249 |
+
merged_list = content_writer + digital_marketing + communication + business_dev + seo
|
250 |
+
seen_urls = set()
|
251 |
+
unique_objects = []
|
252 |
+
|
253 |
+
for obj in merged_list:
|
254 |
+
if obj["URL"] not in seen_urls:
|
255 |
+
seen_urls.add(obj["URL"])
|
256 |
+
unique_objects.append(obj)
|
257 |
+
|
258 |
+
jobs = sorted(unique_objects, key=lambda x: x["published_at"], reverse=True)
|
259 |
+
|
260 |
+
#filter on the job description
|
261 |
+
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
|
262 |
+
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
|
263 |
+
|
264 |
+
return html_format_page(jobs, job_filter, job_filter_negative)
|
265 |
+
|
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
|
4 |
+
from jobspy_indeed import indeed_get_html
|
5 |
+
from WelcomeToTheJungle import wtoj_get_html
|
6 |
+
from jobspy_linkedin import linkedin_get_html
|
7 |
+
|
8 |
+
def search_jobs(api_key, platform):
|
9 |
+
if api_key == "":
|
10 |
+
raise gr.Error("API key is required")
|
11 |
+
os.environ['MISTRAL_KEY'] = api_key
|
12 |
+
if platform == "Indeed":
|
13 |
+
return indeed_get_html()
|
14 |
+
elif platform == "Welcome to the jungle":
|
15 |
+
return wtoj_get_html()
|
16 |
+
elif platform == "LinkedIn":
|
17 |
+
return linkedin_get_html()
|
18 |
+
raise gr.Error("No platform selected")
|
19 |
+
|
20 |
+
|
21 |
+
api_key = gr.Textbox(label="API key")
|
22 |
+
platform = gr.Radio(choices=["Welcome to the jungle", "Indeed", "LinkedIn"], label="Platform")
|
23 |
+
output_html = gr.HTML(label="Result", value="<html><br/><br/><br/><br/></html>")
|
24 |
+
demo = gr.Interface(
|
25 |
+
fn=search_jobs,
|
26 |
+
inputs=[api_key, platform],
|
27 |
+
outputs=[output_html],
|
28 |
+
flagging_mode="never",
|
29 |
+
show_progress="full",
|
30 |
+
clear_btn=None,
|
31 |
+
title="Job search"
|
32 |
+
)
|
33 |
+
|
34 |
+
demo.launch()
|
jobspy_indeed.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import warnings
|
3 |
+
from mistralai import Mistral, SDKError
|
4 |
+
from time import sleep
|
5 |
+
|
6 |
+
from jobspy import scrape_jobs
|
7 |
+
|
8 |
+
warnings.filterwarnings("ignore")
|
9 |
+
import os
|
10 |
+
|
11 |
+
models = ["mistral-small-2409", "open-mistral-nemo"]
|
12 |
+
|
13 |
+
import random
|
14 |
+
def get_model():
|
15 |
+
return random.choice(models)
|
16 |
+
|
17 |
+
def call_ai(prompt, json_mode):
|
18 |
+
try:
|
19 |
+
return _call_ai(prompt, json_mode)
|
20 |
+
except SDKError as e:
|
21 |
+
#Wait, then try again once
|
22 |
+
sleep(11)
|
23 |
+
return _call_ai(prompt, json_mode)
|
24 |
+
except Exception as e:
|
25 |
+
# Throw the error if it's not an SDKError
|
26 |
+
raise
|
27 |
+
|
28 |
+
def _call_ai(prompt, json_mode):
|
29 |
+
sleep(1.1)
|
30 |
+
client = Mistral(api_key=os.environ['MISTRAL_KEY'])
|
31 |
+
|
32 |
+
extra_param = {}
|
33 |
+
if json_mode:
|
34 |
+
extra_param = { "response_format" : {"type": "json_object"} }
|
35 |
+
|
36 |
+
chat_response = client.chat.complete(
|
37 |
+
model = get_model(),
|
38 |
+
messages = [
|
39 |
+
{
|
40 |
+
"role": "user",
|
41 |
+
"content": prompt,
|
42 |
+
},
|
43 |
+
],
|
44 |
+
**extra_param
|
45 |
+
)
|
46 |
+
|
47 |
+
return chat_response.choices[0].message.content
|
48 |
+
|
49 |
+
def get_offer_information(company, offer):
|
50 |
+
try:
|
51 |
+
return _get_offer_information(company, offer)
|
52 |
+
except json.decoder.JSONDecodeError as e:
|
53 |
+
#try again once
|
54 |
+
return _get_offer_information(company, offer)
|
55 |
+
except Exception as e:
|
56 |
+
# Throw the error if it's not an SDKError
|
57 |
+
raise
|
58 |
+
|
59 |
+
def _get_offer_information(company, offer):
|
60 |
+
prompt = """This is a job offer from the company '{}', make a JSON with this information:
|
61 |
+
- company_description (string): a description of the company in less than 15 words.
|
62 |
+
- position_summary (string): a summary of the role in 3 bullet points
|
63 |
+
- language_requirements (string): the language requirements in French and English
|
64 |
+
- experience_requirements (string): the experience requirements
|
65 |
+
- is_an_internship (Boolean): true if it's an internship, false otherwise
|
66 |
+
- salary_range (string): the salary range in yearly salary if stated, write 'unknown' otherwise
|
67 |
+
- should_apply (Boolean): True if the offer requires up to 2 years of work experience and does not ask for other languages than English, French, Hindi or Nepali
|
68 |
+
|
69 |
+
Be concise in each answer. Answer in English.
|
70 |
+
|
71 |
+
Example:
|
72 |
+
{{
|
73 |
+
'company_description': 'Galileo Global Education: A leading international network of higher education institutions.',
|
74 |
+
'position_summary': 'Project Manager Marketing and Communication: Develop brand experience, manage marketing/communication plan, ensure brand image, monitor e-reputation, create content, and collaborate with digital team.',
|
75 |
+
'language_requirements': 'French Fluent and English Native',
|
76 |
+
'experience_requirements': 'Previous experience in a similar role, preferably in an agency.',
|
77 |
+
'is_an_internship': false,
|
78 |
+
'salary_range': '€38,000-€42,000',
|
79 |
+
'should_apply': true,
|
80 |
+
}}
|
81 |
+
|
82 |
+
Offer:
|
83 |
+
{}""".format(company, offer)
|
84 |
+
result = call_ai(prompt, True)
|
85 |
+
obj = json.loads(result)
|
86 |
+
print(obj)
|
87 |
+
#Check result
|
88 |
+
if not "company_description" in obj:
|
89 |
+
obj["company_description"] = ""
|
90 |
+
if not "position_summary" in obj:
|
91 |
+
obj["position_summary"] = ""
|
92 |
+
if not "language_requirements" in obj:
|
93 |
+
obj["language_requirements"] = ""
|
94 |
+
if not "experience_requirements" in obj:
|
95 |
+
obj["experience_requirements"] = ""
|
96 |
+
if not "is_an_internship" in obj:
|
97 |
+
obj["is_an_internship"] = False
|
98 |
+
if not "salary_range" in obj:
|
99 |
+
obj["salary_range"] = ""
|
100 |
+
if not "should_apply" in obj:
|
101 |
+
obj["should_apply"] = True
|
102 |
+
|
103 |
+
return obj
|
104 |
+
|
105 |
+
def get_job_url(job):
|
106 |
+
if job["job_url_direct"] == "":
|
107 |
+
return job["job_url"]
|
108 |
+
return job["job_url_direct"]
|
109 |
+
|
110 |
+
def get_company_url(job):
|
111 |
+
if job["company_url_direct"] == "":
|
112 |
+
return job["company_url"]
|
113 |
+
return job["company_url_direct"]
|
114 |
+
|
115 |
+
def get_salary(job):
|
116 |
+
if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
|
117 |
+
if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
|
118 |
+
return job["ai_result"]["salary_range"]
|
119 |
+
return ""
|
120 |
+
return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
|
121 |
+
|
122 |
+
def format_should_apply(should_apply):
|
123 |
+
if should_apply:
|
124 |
+
return "⭐ "
|
125 |
+
return ""
|
126 |
+
|
127 |
+
def get_logo(job):
|
128 |
+
if "{}".format(job["logo_photo_url"]) == "nan":
|
129 |
+
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
|
130 |
+
return job["logo_photo_url"]
|
131 |
+
|
132 |
+
def format_str_or_list(sum):
|
133 |
+
if isinstance(sum, str):
|
134 |
+
return sum.replace("\n", "<br />")
|
135 |
+
if isinstance(sum, list):
|
136 |
+
return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
|
137 |
+
return sum
|
138 |
+
|
139 |
+
def html_format_job(job):
|
140 |
+
#open box
|
141 |
+
result = ["<div class='job'>"]
|
142 |
+
#logo
|
143 |
+
result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
|
144 |
+
#text part
|
145 |
+
result.append("<div style='flex: 5; padding: 10px;'>")
|
146 |
+
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
|
147 |
+
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], job["date_posted"].strftime("%d/%m/%Y")))
|
148 |
+
result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
|
149 |
+
result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
|
150 |
+
result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
|
151 |
+
#close text part
|
152 |
+
result.append("</div>")
|
153 |
+
#close box
|
154 |
+
result.append("</div>")
|
155 |
+
return " ".join(result)
|
156 |
+
|
157 |
+
def filterout_jobs(jobs, job_filter, job_filter_negative):
|
158 |
+
selected_jobs = []
|
159 |
+
for index, job in jobs.iterrows():
|
160 |
+
if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter):
|
161 |
+
job["ai_result"] = get_offer_information(job["company"], job["description"])
|
162 |
+
if job["ai_result"]["is_an_internship"] == False:
|
163 |
+
selected_jobs.append(job)
|
164 |
+
|
165 |
+
return selected_jobs
|
166 |
+
|
167 |
+
def html_format_page(jobs, job_filter, job_filter_negative):
|
168 |
+
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
|
169 |
+
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
170 |
+
for job in selected_jobs:
|
171 |
+
result.append(html_format_job(job))
|
172 |
+
result.append("</body></html>")
|
173 |
+
return " ".join(result)
|
174 |
+
|
175 |
+
def get_jobs(search_term, results_wanted):
|
176 |
+
return scrape_jobs(
|
177 |
+
site_name=["indeed"],#, "linkedin", "glassdoor"],
|
178 |
+
search_term=search_term,
|
179 |
+
location="Paris, France",
|
180 |
+
job_type="fulltime",
|
181 |
+
results_wanted=results_wanted,
|
182 |
+
#hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old)
|
183 |
+
country_indeed='France', # only needed for indeed / glassdoor
|
184 |
+
enforce_annual_salary=True,
|
185 |
+
|
186 |
+
linkedin_fetch_description=False, # get more info such as full description, direct job url for linkedin (slower)
|
187 |
+
)
|
188 |
+
|
189 |
+
def indeed_get_html():
|
190 |
+
content_writer = get_jobs('"content writer"', 50)
|
191 |
+
digital_marketing = get_jobs('"Digital Marketing"', 50)
|
192 |
+
communication = get_jobs("Communication", 50)
|
193 |
+
business_dev = get_jobs('"Business development"', 50)
|
194 |
+
seo = get_jobs("SEO", 50)
|
195 |
+
|
196 |
+
import pandas as pd
|
197 |
+
jobs = pd.concat([content_writer, digital_marketing, communication, business_dev, seo], ignore_index=True).drop_duplicates(subset='id').sort_values(by='date_posted', ascending=False)#.head(3)
|
198 |
+
"""
|
199 |
+
jobs=get_jobs('"Digital Marketing"', 20)
|
200 |
+
"""
|
201 |
+
#filter on the job description
|
202 |
+
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
|
203 |
+
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
|
204 |
+
|
205 |
+
return html_format_page(jobs, job_filter, job_filter_negative)
|
206 |
+
|
jobspy_linkedin.py
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import warnings
|
3 |
+
import datetime
|
4 |
+
from mistralai import Mistral, SDKError
|
5 |
+
from time import sleep
|
6 |
+
|
7 |
+
from jobspy import scrape_jobs
|
8 |
+
|
9 |
+
warnings.filterwarnings("ignore")
|
10 |
+
import os
|
11 |
+
|
12 |
+
models = ["mistral-small-2409", "open-mistral-nemo"]
|
13 |
+
|
14 |
+
import random
|
15 |
+
def get_model():
|
16 |
+
return random.choice(models)
|
17 |
+
|
18 |
+
def call_ai(prompt, json_mode):
|
19 |
+
try:
|
20 |
+
return _call_ai(prompt, json_mode)
|
21 |
+
except SDKError as e:
|
22 |
+
#Wait, then try again once
|
23 |
+
sleep(11)
|
24 |
+
return _call_ai(prompt, json_mode)
|
25 |
+
except Exception as e:
|
26 |
+
# Throw the error if it's not an SDKError
|
27 |
+
raise
|
28 |
+
|
29 |
+
def _call_ai(prompt, json_mode):
|
30 |
+
sleep(1.1)
|
31 |
+
client = Mistral(api_key=os.environ['MISTRAL_KEY'])
|
32 |
+
|
33 |
+
extra_param = {}
|
34 |
+
if json_mode:
|
35 |
+
extra_param = { "response_format" : {"type": "json_object"} }
|
36 |
+
|
37 |
+
chat_response = client.chat.complete(
|
38 |
+
model = get_model(),
|
39 |
+
messages = [
|
40 |
+
{
|
41 |
+
"role": "user",
|
42 |
+
"content": prompt,
|
43 |
+
},
|
44 |
+
],
|
45 |
+
**extra_param
|
46 |
+
)
|
47 |
+
|
48 |
+
return chat_response.choices[0].message.content
|
49 |
+
|
50 |
+
def get_offer_information(company, offer):
|
51 |
+
try:
|
52 |
+
return _get_offer_information(company, offer)
|
53 |
+
except json.decoder.JSONDecodeError as e:
|
54 |
+
#try again once
|
55 |
+
return _get_offer_information(company, offer)
|
56 |
+
except Exception as e:
|
57 |
+
# Throw the error if it's not an SDKError
|
58 |
+
raise
|
59 |
+
|
60 |
+
def _get_offer_information(company, offer):
|
61 |
+
prompt = """This is a job offer from the company '{}', make a JSON with this information:
|
62 |
+
- company_description (string): a description of the company in less than 15 words.
|
63 |
+
- position_summary (string): a summary of the role in 3 bullet points
|
64 |
+
- language_requirements (string): the language requirements in French and English
|
65 |
+
- experience_requirements (string): the experience requirements
|
66 |
+
- is_an_internship (Boolean): true if it's an internship, false otherwise
|
67 |
+
- salary_range (string): the salary range in yearly salary if stated, write 'unknown' otherwise
|
68 |
+
- should_apply (Boolean): True if the offer requires up to 2 years of work experience and does not ask for other languages than English, French, Hindi or Nepali
|
69 |
+
|
70 |
+
Be concise in each answer. Answer in English.
|
71 |
+
|
72 |
+
Example:
|
73 |
+
{{
|
74 |
+
'company_description': 'Galileo Global Education: A leading international network of higher education institutions.',
|
75 |
+
'position_summary': 'Project Manager Marketing and Communication: Develop brand experience, manage marketing/communication plan, ensure brand image, monitor e-reputation, create content, and collaborate with digital team.',
|
76 |
+
'language_requirements': 'French Fluent and English Native',
|
77 |
+
'experience_requirements': 'Previous experience in a similar role, preferably in an agency.',
|
78 |
+
'is_an_internship': false,
|
79 |
+
'salary_range': '€38,000-€42,000',
|
80 |
+
'should_apply': true,
|
81 |
+
}}
|
82 |
+
|
83 |
+
Offer:
|
84 |
+
{}""".format(company, offer)
|
85 |
+
result = call_ai(prompt, True)
|
86 |
+
obj = json.loads(result)
|
87 |
+
print(obj)
|
88 |
+
#Check result
|
89 |
+
if not "company_description" in obj:
|
90 |
+
obj["company_description"] = ""
|
91 |
+
if not "position_summary" in obj:
|
92 |
+
obj["position_summary"] = ""
|
93 |
+
if not "language_requirements" in obj:
|
94 |
+
obj["language_requirements"] = ""
|
95 |
+
if not "experience_requirements" in obj:
|
96 |
+
obj["experience_requirements"] = ""
|
97 |
+
if not "is_an_internship" in obj:
|
98 |
+
obj["is_an_internship"] = False
|
99 |
+
if not "salary_range" in obj:
|
100 |
+
obj["salary_range"] = ""
|
101 |
+
if not "should_apply" in obj:
|
102 |
+
obj["should_apply"] = True
|
103 |
+
|
104 |
+
return obj
|
105 |
+
|
106 |
+
def get_job_url(job):
|
107 |
+
if "{}".format(job["job_url_direct"]) in ["null", "nan", "None"]:
|
108 |
+
return job["job_url"]
|
109 |
+
return job["job_url_direct"]
|
110 |
+
|
111 |
+
def get_company_url(job):
|
112 |
+
if "{}".format(job["company_url_direct"]) in ["null", "nan", "None"]:
|
113 |
+
return job["company_url"]
|
114 |
+
return job["company_url_direct"]
|
115 |
+
|
116 |
+
def get_salary(job):
|
117 |
+
if "{}".format(job["min_amount"]) == "nan" or "{}".format(job["min_amount"])== "None":
|
118 |
+
if job["ai_result"]["salary_range"].lower() not in ["", "unknown"]:
|
119 |
+
return job["ai_result"]["salary_range"]
|
120 |
+
return ""
|
121 |
+
return "{}-{}{}".format(job["min_amount"], job["max_amount"], job["currency"])
|
122 |
+
|
123 |
+
def format_should_apply(should_apply):
|
124 |
+
if should_apply:
|
125 |
+
return "⭐ "
|
126 |
+
return ""
|
127 |
+
|
128 |
+
def get_logo(job):
|
129 |
+
if "{}".format(job["logo_photo_url"]) == "nan":
|
130 |
+
return "https://e7.pngegg.com/pngimages/153/807/png-clipart-timer-clock-computer-icons-unknown-planet-digital-clock-time.png"
|
131 |
+
return job["logo_photo_url"]
|
132 |
+
|
133 |
+
def format_str_or_list(sum):
|
134 |
+
if isinstance(sum, str):
|
135 |
+
return sum.replace("\n", "<br />")
|
136 |
+
if isinstance(sum, list):
|
137 |
+
return "<ul>" + "".join(f"<li>{item}</li>" for item in sum) + "</ul>"
|
138 |
+
return sum
|
139 |
+
|
140 |
+
def format_posted_date(date):
|
141 |
+
if "{}".format(date) == "nan":
|
142 |
+
return "?"
|
143 |
+
if isinstance(date, str):
|
144 |
+
return datetime.datetime.fromtimestamp(int(date)).strftime("%d/%m/%Y")
|
145 |
+
return date.strftime("%d/%m/%Y")
|
146 |
+
|
147 |
+
def html_format_job(job):
|
148 |
+
#open box
|
149 |
+
result = ["<div class='job'>"]
|
150 |
+
#logo
|
151 |
+
result.append("<div class='logobox'><img src='{}' alt='No logo' class='logo'></div>".format(get_logo(job)))
|
152 |
+
#text part
|
153 |
+
result.append("<div style='flex: 5; padding: 10px;'>")
|
154 |
+
result.append("<h3><a href='{}' target='_blank'>{}{}</a></h3>".format(get_job_url(job), format_should_apply(job["ai_result"]["should_apply"]), job["title"]))
|
155 |
+
result.append("<p><a href='{}' target='_blank'>{}</a> ({}) - published at {}</p>".format(get_company_url(job), job["company"], job["ai_result"]["company_description"], format_posted_date(job["date_posted"])))
|
156 |
+
result.append("<p><h4>Position: {}</h4>{}</p>".format(get_salary(job), format_str_or_list(job["ai_result"]["position_summary"])))
|
157 |
+
result.append("<p><h4>Language:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["language_requirements"])))
|
158 |
+
result.append("<p><h4>Experience:</h4>{}</p>".format(format_str_or_list(job["ai_result"]["experience_requirements"])))
|
159 |
+
#close text part
|
160 |
+
result.append("</div>")
|
161 |
+
#close box
|
162 |
+
result.append("</div>")
|
163 |
+
return " ".join(result)
|
164 |
+
|
165 |
+
def filterout_jobs(jobs, job_filter, job_filter_negative):
|
166 |
+
selected_jobs = []
|
167 |
+
for index, job in jobs.iterrows():
|
168 |
+
if not any(item in job["title"].lower() for item in job_filter_negative) and any(item in job["title"].lower() for item in job_filter) and "{}".format(job["description"]) not in ["null", "nan", "None"]:
|
169 |
+
job["ai_result"] = get_offer_information(job["company"], job["description"])
|
170 |
+
if job["ai_result"]["is_an_internship"] == False:
|
171 |
+
selected_jobs.append(job)
|
172 |
+
|
173 |
+
return selected_jobs
|
174 |
+
|
175 |
+
def html_format_page(jobs, job_filter, job_filter_negative):
|
176 |
+
selected_jobs = filterout_jobs(jobs, job_filter, job_filter_negative)
|
177 |
+
result = ["<html><head><style>.job{display: flex;width:70%;margin: 5px auto;border: 1px solid;border-radius: 5px;}.logobox{flex: 1;display: flex;align-items: center;justify-content: center;}.logo{width:100px;height:100px}h4{margin: 2px;}</style></head><body>"]
|
178 |
+
for job in selected_jobs:
|
179 |
+
result.append(html_format_job(job))
|
180 |
+
result.append("</body></html>")
|
181 |
+
return " ".join(result)
|
182 |
+
|
183 |
+
|
184 |
+
def get_jobs(search_term, results_wanted):
|
185 |
+
return scrape_jobs(
|
186 |
+
site_name=["linkedin"],#, "linkedin", "glassdoor"],
|
187 |
+
search_term=search_term,
|
188 |
+
location="Paris, France",
|
189 |
+
job_type="fulltime",
|
190 |
+
results_wanted=results_wanted,
|
191 |
+
#hours_old=240, # (only Linkedin/Indeed is hour specific, others round up to days old)
|
192 |
+
linkedin_fetch_description=True,
|
193 |
+
enforce_annual_salary=True,
|
194 |
+
)
|
195 |
+
|
196 |
+
def linkedin_get_html():
|
197 |
+
content_writer = get_jobs('"content writer"', 50)
|
198 |
+
digital_marketing = get_jobs('"Digital Marketing"', 50)
|
199 |
+
communication = get_jobs("Communication", 50)
|
200 |
+
business_dev = get_jobs('"Business development"', 50)
|
201 |
+
seo = get_jobs("SEO", 50)
|
202 |
+
|
203 |
+
import pandas as pd
|
204 |
+
jobs = pd.concat([content_writer, digital_marketing, communication, business_dev, seo], ignore_index=True).drop_duplicates(subset='id').sort_values(by='date_posted', ascending=False)#.head(3)
|
205 |
+
"""
|
206 |
+
jobs=get_jobs('"Digital Marketing"', 5)
|
207 |
+
"""
|
208 |
+
#filter on the job description
|
209 |
+
job_filter = ["marketing", "communication", "community", "business development", "experience", "social media", "brand", "ppc", "seo", "sea", "ads", "user acquisition", "adops", "consultant"]
|
210 |
+
job_filter_negative = ["stage", "stagiaire", "alternant", "alternance", "intern", "internship", "apprenti"]
|
211 |
+
|
212 |
+
return html_format_page(jobs, job_filter, job_filter_negative)
|
213 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mistralai
|
2 |
+
jobspy
|
3 |
+
markdownify
|
4 |
+
beautifulsoup4
|