Robzy commited on
Commit
b2d71fe
·
1 Parent(s): 3df06be
Files changed (7) hide show
  1. app.py +55 -0
  2. browser.py +150 -0
  3. job-search.py +35 -0
  4. main.ipynb +382 -0
  5. pyproject.toml +38 -0
  6. requirements.txt +10 -0
  7. uv.lock +0 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ from transformers import AutoTokenizer, AutoModel
6
+ import torch.nn.functional as F
7
+ import timm
8
+ from huggingface_hub import PyTorchModelHubMixin
9
+
10
+
11
+
12
+ class TwoLayerNN(nn.Module, PyTorchModelHubMixin):
13
+ def __init__(self, input_dim, hidden_dim, output_dim):
14
+ super(TwoLayerNN, self).__init__()
15
+ self.fc1 = nn.Linear(input_dim, hidden_dim)
16
+ self.relu = nn.ReLU()
17
+ self.fc2 = nn.Linear(hidden_dim, output_dim)
18
+ self.sigmoid = nn.Sigmoid()
19
+
20
+ def forward(self, x):
21
+ out = self.fc1(x)
22
+ out = self.relu(out)
23
+ out = self.fc2(out)
24
+ out = self.sigmoid(out)
25
+ return out
26
+
27
+
28
+
29
+ classifier = TwoLayerNN.from_pretrained("Robzy/job-classifier", input_dim=384, hidden_dim=128, output_dim=1)
30
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
31
+ embedding_model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
32
+
33
+
34
+ def mean_pooling(model_output, attention_mask):
35
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
36
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
37
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
38
+
39
+ def infer(text):
40
+
41
+ encoded_input = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
42
+
43
+ with torch.no_grad():
44
+ model_output = embedding_model(**encoded_input)
45
+
46
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
47
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
48
+
49
+ output = classifier(sentence_embeddings)
50
+ return output.item()
51
+
52
+
53
+ demo = gr.Interface(fn=infer, inputs="text", outputs="text")
54
+ gr.Textbox(placeholder="Enter job description here", label="Job Description")
55
+ demo.launch()
browser.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ from time import sleep
3
+
4
+ import helium
5
+ from dotenv import load_dotenv
6
+ from PIL import Image
7
+ from selenium import webdriver
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.common.keys import Keys
10
+
11
+ from smolagents import CodeAgent, tool
12
+ from smolagents.agents import ActionStep
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+
18
+ @tool
19
+ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
20
+ """
21
+ Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
22
+ Args:
23
+ text: The text to search for
24
+ nth_result: Which occurrence to jump to (default: 1)
25
+ """
26
+ elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
27
+ if nth_result > len(elements):
28
+ raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
29
+ result = f"Found {len(elements)} matches for '{text}'."
30
+ elem = elements[nth_result - 1]
31
+ driver.execute_script("arguments[0].scrollIntoView(true);", elem)
32
+ result += f"Focused on element {nth_result} of {len(elements)}"
33
+ return result
34
+
35
+ @tool
36
+ def go_back() -> None:
37
+ """Goes back to previous page."""
38
+ driver.back()
39
+
40
+ @tool
41
+ def close_popups() -> str:
42
+ """
43
+ Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows!
44
+ This does not work on cookie consent banners.
45
+ """
46
+ webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
47
+
48
+
49
+ # Configure Chrome options
50
+ chrome_options = webdriver.ChromeOptions()
51
+ chrome_options.add_argument("--force-device-scale-factor=1")
52
+ chrome_options.add_argument("--window-size=1000,1350")
53
+ chrome_options.add_argument("--disable-pdf-viewer")
54
+ chrome_options.add_argument("--window-position=0,0")
55
+
56
+ # Initialize the browser
57
+ driver = helium.start_chrome(headless=False, options=chrome_options)
58
+
59
+ # Set up screenshot callback
60
+ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None:
61
+ sleep(1.0) # Let JavaScript animations happen before taking the screenshot
62
+ driver = helium.get_driver()
63
+ current_step = memory_step.step_number
64
+ if driver is not None:
65
+ for previous_memory_step in agent.memory.steps: # Remove previous screenshots for lean processing
66
+ if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= current_step - 2:
67
+ previous_memory_step.observations_images = None
68
+ png_bytes = driver.get_screenshot_as_png()
69
+ image = Image.open(BytesIO(png_bytes))
70
+ print(f"Captured a browser screenshot: {image.size} pixels")
71
+ memory_step.observations_images = [image.copy()] # Create a copy to ensure it persists
72
+
73
+ # Update observations with current URL
74
+ url_info = f"Current url: {driver.current_url}"
75
+ memory_step.observations = (
76
+ url_info if memory_step.observations is None else memory_step.observations + "\n" + url_info
77
+ )
78
+
79
+ from smolagents import HfApiModel
80
+
81
+ # Initialize the model
82
+ model_id = "meta-llama/Llama-3.3-70B-Instruct" # You can change this to your preferred model
83
+ model = HfApiModel(model_id)
84
+
85
+ # Create the agent
86
+ agent = CodeAgent(
87
+ tools=[go_back, close_popups, search_item_ctrl_f],
88
+ model=model,
89
+ additional_authorized_imports=["helium"],
90
+ step_callbacks=[save_screenshot],
91
+ max_steps=20,
92
+ verbosity_level=2,
93
+ )
94
+
95
+ # Import helium for the agent
96
+ agent.python_executor("from helium import *", agent.state)
97
+
98
+ helium_instructions = """
99
+ You can use helium to access websites. Don't bother about the helium driver, it's already managed.
100
+ We've already ran "from helium import *"
101
+ Then you can go to pages!
102
+ Code:
103
+ ```py
104
+ go_to('github.com/trending')
105
+ ```<end_code>
106
+
107
+ You can directly click clickable elements by inputting the text that appears on them.
108
+ Code:
109
+ ```py
110
+ click("Top products")
111
+ ```<end_code>
112
+
113
+ If it's a link:
114
+ Code:
115
+ ```py
116
+ click(Link("Top products"))
117
+ ```<end_code>
118
+
119
+ If you try to interact with an element and it's not found, you'll get a LookupError.
120
+ In general stop your action after each button click to see what happens on your screenshot.
121
+ Never try to login in a page.
122
+
123
+ To scroll up or down, use scroll_down or scroll_up with as an argument the number of pixels to scroll from.
124
+ Code:
125
+ ```py
126
+ scroll_down(num_pixels=1200) # This will scroll one viewport down
127
+ ```<end_code>
128
+
129
+ When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
130
+ Just use your built-in tool `close_popups` to close them:
131
+ Code:
132
+ ```py
133
+ close_popups()
134
+ ```<end_code>
135
+
136
+ You can use .exists() to check for the existence of an element. For example:
137
+ Code:
138
+ ```py
139
+ if Text('Accept cookies?').exists():
140
+ click('I accept')
141
+ ```<end_code>
142
+ """
143
+
144
+ search_request = """
145
+ Please navigate to https://en.wikipedia.org/wiki/Chicago and give me a sentence containing the word "1992" that mentions a construction accident.
146
+ """
147
+
148
+ agent_output = agent.run(search_request + helium_instructions)
149
+ print("Final output:")
150
+ print(agent_output)
job-search.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ url = "https://jobsearch.api.jobtechdev.se/search"
4
+
5
+ response = requests.get(url)
6
+
7
+ if response.status_code == 200:
8
+ data = response.json()
9
+ print(data)
10
+ else:
11
+ print(f"Failed to retrieve data: {response.status_code}")
12
+
13
+
14
+
15
+ {
16
+ "hits": [
17
+ {
18
+ "id": "1",
19
+ "headline": "Data Scientist",
20
+ "description": {"text": "We are looking for a data scientist to join our team.",
21
+ "text-formatted": "text_formatted"},
22
+ "location": "Stockholm",
23
+ "company": "Company A"
24
+ },
25
+ {
26
+ "id": "2",
27
+ "headline": "Software Engineer",
28
+ "description": {"text": "We are looking for a data scientist to join our team.",
29
+ "text-formatted": "text_formatted"},
30
+ "location": "Gothenburg",
31
+ "company": "Company B"
32
+ },
33
+ ...
34
+ ]
35
+ }
main.ipynb ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import torch\n",
10
+ "import torch.nn.functional as F\n",
11
+ "from transformers import AutoTokenizer, AutoModel\n",
12
+ "import os\n",
13
+ "import torch\n",
14
+ "import torch.nn as nn\n",
15
+ "import torch.optim as optim\n",
16
+ "import torch.nn.functional as F\n",
17
+ "from huggingface_hub import PyTorchModelHubMixin"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 4,
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "# Load model directly\n",
27
+ " \n",
28
+ "tokenizer = AutoTokenizer.from_pretrained(\"sentence-transformers/all-MiniLM-L6-v2\")\n",
29
+ "model = AutoModel.from_pretrained(\"sentence-transformers/all-MiniLM-L6-v2\")"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 5,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "name": "stdout",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "43\n"
42
+ ]
43
+ }
44
+ ],
45
+ "source": [
46
+ "import os\n",
47
+ "\n",
48
+ "# Directory containing the job files\n",
49
+ "jobs_dir = 'jobs'\n",
50
+ "\n",
51
+ "# List to store the contents of the txt files with labels\n",
52
+ "dataset = []\n",
53
+ "\n",
54
+ "# Walk through the directory\n",
55
+ "for root, dirs, files in os.walk(jobs_dir):\n",
56
+ " for file in files:\n",
57
+ " if file.endswith('.txt'):\n",
58
+ " file_path = os.path.join(root, file)\n",
59
+ " with open(file_path, 'r') as f:\n",
60
+ " txt = f.read()\n",
61
+ " label = 0 if 'disliked' in root else 1\n",
62
+ " dataset.append((txt, label))\n",
63
+ "\n",
64
+ "# Print the number of txt files\n",
65
+ "print(len(dataset))"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 6,
71
+ "metadata": {},
72
+ "outputs": [],
73
+ "source": [
74
+ "import random\n",
75
+ "txts = [txt for txt, label in dataset]\n",
76
+ "labels = [label for txt, label in dataset]\n",
77
+ "\n",
78
+ "# Generate a list of indices and shuffle them\n",
79
+ "indices = list(range(len(txts)))\n",
80
+ "random.shuffle(indices)\n",
81
+ "\n",
82
+ "# Apply the shuffled indices to txts and labels\n",
83
+ "txts = [txts[i] for i in indices]\n",
84
+ "labels = [labels[i] for i in indices]"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "code",
89
+ "execution_count": 7,
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "# Tokenize sentences\n",
94
+ "# text = [\"Hello, my dog is cute\", \"Hello, my cat is cute\"]\n",
95
+ "encoded_input = tokenizer(txts, padding=True, truncation=True, return_tensors='pt')\n",
96
+ "\n",
97
+ "# Compute token embeddings\n",
98
+ "with torch.no_grad():\n",
99
+ " model_output = model(**encoded_input)"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 8,
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "def mean_pooling(model_output, attention_mask):\n",
109
+ " token_embeddings = model_output[0] #First element of model_output contains all token embeddings\n",
110
+ " input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()\n",
111
+ " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n",
112
+ "\n",
113
+ "\n",
114
+ "# Perform pooling\n",
115
+ "sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])\n",
116
+ "\n",
117
+ "# Normalize embeddings\n",
118
+ "sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 9,
124
+ "metadata": {},
125
+ "outputs": [
126
+ {
127
+ "data": {
128
+ "text/plain": [
129
+ "torch.Size([43, 384])"
130
+ ]
131
+ },
132
+ "execution_count": 9,
133
+ "metadata": {},
134
+ "output_type": "execute_result"
135
+ }
136
+ ],
137
+ "source": [
138
+ "sentence_embeddings.size()"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 10,
144
+ "metadata": {},
145
+ "outputs": [
146
+ {
147
+ "data": {
148
+ "text/plain": [
149
+ "[1,\n",
150
+ " 0,\n",
151
+ " 1,\n",
152
+ " 0,\n",
153
+ " 0,\n",
154
+ " 1,\n",
155
+ " 1,\n",
156
+ " 0,\n",
157
+ " 1,\n",
158
+ " 0,\n",
159
+ " 0,\n",
160
+ " 0,\n",
161
+ " 0,\n",
162
+ " 0,\n",
163
+ " 0,\n",
164
+ " 0,\n",
165
+ " 1,\n",
166
+ " 0,\n",
167
+ " 0,\n",
168
+ " 0,\n",
169
+ " 1,\n",
170
+ " 1,\n",
171
+ " 0,\n",
172
+ " 0,\n",
173
+ " 1,\n",
174
+ " 0,\n",
175
+ " 1,\n",
176
+ " 1,\n",
177
+ " 1,\n",
178
+ " 0,\n",
179
+ " 1,\n",
180
+ " 0,\n",
181
+ " 0,\n",
182
+ " 0,\n",
183
+ " 0,\n",
184
+ " 0,\n",
185
+ " 0,\n",
186
+ " 0,\n",
187
+ " 1,\n",
188
+ " 0,\n",
189
+ " 0,\n",
190
+ " 0,\n",
191
+ " 0]"
192
+ ]
193
+ },
194
+ "execution_count": 10,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "labels"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 11,
206
+ "metadata": {},
207
+ "outputs": [
208
+ {
209
+ "name": "stdout",
210
+ "output_type": "stream",
211
+ "text": [
212
+ "Epoch [5/20], Loss: 0.6616\n",
213
+ "Epoch [10/20], Loss: 0.6401\n",
214
+ "Epoch [15/20], Loss: 0.6221\n",
215
+ "Epoch [20/20], Loss: 0.6074\n",
216
+ "Training complete.\n"
217
+ ]
218
+ }
219
+ ],
220
+ "source": [
221
+ "\n",
222
+ "\n",
223
+ "# Define the neural network\n",
224
+ "class TwoLayerNN(nn.Module, PyTorchModelHubMixin):\n",
225
+ " def __init__(self, input_dim, hidden_dim, output_dim):\n",
226
+ " super(TwoLayerNN, self).__init__()\n",
227
+ " self.fc1 = nn.Linear(input_dim, hidden_dim)\n",
228
+ " self.relu = nn.ReLU()\n",
229
+ " self.fc2 = nn.Linear(hidden_dim, output_dim)\n",
230
+ " self.sigmoid = nn.Sigmoid()\n",
231
+ "\n",
232
+ " def forward(self, x):\n",
233
+ " out = self.fc1(x)\n",
234
+ " out = self.relu(out)\n",
235
+ " out = self.fc2(out)\n",
236
+ " out = self.sigmoid(out)\n",
237
+ " return out\n",
238
+ "\n",
239
+ "# Hyperparameters\n",
240
+ "input_dim = sentence_embeddings.size(1)\n",
241
+ "hidden_dim = 128\n",
242
+ "output_dim = 1\n",
243
+ "num_epochs = 20\n",
244
+ "learning_rate = 0.001\n",
245
+ "\n",
246
+ "# Model, loss function, and optimizer\n",
247
+ "classifier = TwoLayerNN(input_dim, hidden_dim, output_dim)\n",
248
+ "criterion = nn.BCELoss()\n",
249
+ "optimizer = optim.Adam(classifier.parameters(), lr=learning_rate)\n",
250
+ "\n",
251
+ "# Convert labels to tensor\n",
252
+ "labels_tensor = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)\n",
253
+ "\n",
254
+ "# Training loop\n",
255
+ "for epoch in range(num_epochs):\n",
256
+ " classifier.train()\n",
257
+ " optimizer.zero_grad()\n",
258
+ " outputs = classifier(sentence_embeddings)\n",
259
+ " loss = criterion(outputs, labels_tensor)\n",
260
+ " loss.backward()\n",
261
+ " optimizer.step()\n",
262
+ "\n",
263
+ " if (epoch+1) % 5 == 0:\n",
264
+ " print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n",
265
+ "\n",
266
+ "print(\"Training complete.\")"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": 16,
272
+ "metadata": {},
273
+ "outputs": [
274
+ {
275
+ "name": "stdout",
276
+ "output_type": "stream",
277
+ "text": [
278
+ "Job description: A very fun job with data science and machine learning\n",
279
+ "Prediction: liked (score: 0.5050)\n"
280
+ ]
281
+ }
282
+ ],
283
+ "source": [
284
+ "# Inference\n",
285
+ "classifier.eval()\n",
286
+ "job_description = \"A very fun job with data science and machine learning\"\n",
287
+ "encoded_input = tokenizer(job_description, padding=True, truncation=True, return_tensors='pt')\n",
288
+ "with torch.no_grad():\n",
289
+ " model_output = model(**encoded_input)\n",
290
+ "sentence_embedding = mean_pooling(model_output, encoded_input['attention_mask'])\n",
291
+ "sentence_embedding = F.normalize(sentence_embedding, p=2, dim=1)\n",
292
+ "output = classifier(sentence_embedding)\n",
293
+ "prediction = 'liked' if output.item() > 0.5 else 'disliked'\n",
294
+ "print(f\"Job description: {job_description}\")\n",
295
+ "print(f\"Prediction: {prediction} (score: {output.item():.4f})\")"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 13,
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": [
304
+ "from huggingface_hub import HfApi, HfFolder\n",
305
+ "\n",
306
+ "# Save the model and tokenizer\n",
307
+ "classifier.save_pretrained(\"job-classifier\")\n",
308
+ "tokenizer.save_pretrained(\"job-classifier\")\n",
309
+ "\n",
310
+ "# Initialize the HfApi\n",
311
+ "api = HfApi()"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": 14,
317
+ "metadata": {},
318
+ "outputs": [
319
+ {
320
+ "name": "stderr",
321
+ "output_type": "stream",
322
+ "text": [
323
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n"
324
+ ]
325
+ },
326
+ {
327
+ "data": {
328
+ "text/plain": [
329
+ "CommitInfo(commit_url='https://huggingface.co/Robzy/job-classifier/commit/fbe58c86c6d0859305675ac93f155fef7462a58d', commit_message='Upload model', commit_description='', oid='fbe58c86c6d0859305675ac93f155fef7462a58d', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Robzy/job-classifier', endpoint='https://huggingface.co', repo_type='model', repo_id='Robzy/job-classifier'), pr_revision=None, pr_num=None)"
330
+ ]
331
+ },
332
+ "execution_count": 14,
333
+ "metadata": {},
334
+ "output_type": "execute_result"
335
+ }
336
+ ],
337
+ "source": [
338
+ "model.push_to_hub(\"Robzy/job-classifier\")"
339
+ ]
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": 17,
344
+ "metadata": {},
345
+ "outputs": [],
346
+ "source": [
347
+ "input_dim = 384\n",
348
+ "hidden_dim = 128\n",
349
+ "output_dim = 1\n",
350
+ "classifier = TwoLayerNN.from_pretrained(\"Robzy/job-classifier\", input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim)"
351
+ ]
352
+ },
353
+ {
354
+ "cell_type": "code",
355
+ "execution_count": null,
356
+ "metadata": {},
357
+ "outputs": [],
358
+ "source": []
359
+ }
360
+ ],
361
+ "metadata": {
362
+ "kernelspec": {
363
+ "display_name": ".venv",
364
+ "language": "python",
365
+ "name": "python3"
366
+ },
367
+ "language_info": {
368
+ "codemirror_mode": {
369
+ "name": "ipython",
370
+ "version": 3
371
+ },
372
+ "file_extension": ".py",
373
+ "mimetype": "text/x-python",
374
+ "name": "python",
375
+ "nbconvert_exporter": "python",
376
+ "pygments_lexer": "ipython3",
377
+ "version": "3.12.8"
378
+ }
379
+ },
380
+ "nbformat": 4,
381
+ "nbformat_minor": 2
382
+ }
pyproject.toml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "hf-workshop"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio>=5.21.0",
9
+ "helium>=5.1.1",
10
+ "llama-index>=0.12.24",
11
+ "numpy>=2.2.3",
12
+ "pillow>=11.1.0",
13
+ "scipy>=1.15.2",
14
+ "selenium>=4.29.0",
15
+ "smolagents>=1.10.0",
16
+ "timm>=1.0.15",
17
+ "torch>=2.6.0",
18
+ "torchvision>=0.21.0",
19
+ "transformers>=4.49.0",
20
+ ]
21
+
22
+ [dependency-groups]
23
+ dev = [
24
+ "ipykernel>=6.29.5",
25
+ ]
26
+
27
+ [tool.uv.sources]
28
+ torch = [
29
+ { index = "pytorch-cpu" },
30
+ ]
31
+ torchvision = [
32
+ { index = "pytorch-cpu" },
33
+ ]
34
+
35
+ [[tool.uv.index]]
36
+ name = "pytorch-cpu"
37
+ url = "https://download.pytorch.org/whl/cpu"
38
+ explicit = true
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ smolagents
2
+ selenium
3
+ helium
4
+ pillow
5
+ gradio
6
+ transformers
7
+ numpy
8
+ sentence-transformers
9
+ torch
10
+ timm
uv.lock ADDED
The diff for this file is too large to render. See raw diff