liltom-eth commited on
Commit
f03b7cc
·
1 Parent(s): d2cca8d

Upload deploy_llava.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. deploy_llava.ipynb +156 -92
deploy_llava.ipynb CHANGED
@@ -1,5 +1,18 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
@@ -9,49 +22,37 @@
9
  "!pip install sagemaker --upgrade"
10
  ]
11
  },
 
 
 
 
 
 
 
12
  {
13
  "cell_type": "code",
14
- "execution_count": 10,
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
 
18
  "!tar -cf model.tar.gz --use-compress-program=pigz *"
19
  ]
20
  },
 
 
 
 
 
 
 
 
 
21
  {
22
  "cell_type": "code",
23
- "execution_count": 11,
24
  "metadata": {},
25
- "outputs": [
26
- {
27
- "name": "stdout",
28
- "output_type": "stream",
29
- "text": [
30
- "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
31
- "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
32
- "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
33
- "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n"
34
- ]
35
- },
36
- {
37
- "name": "stderr",
38
- "output_type": "stream",
39
- "text": [
40
- "Couldn't call 'get_role' to get Role ARN from role name arn:aws:iam::297308036828:root to get Role path.\n"
41
- ]
42
- },
43
- {
44
- "name": "stdout",
45
- "output_type": "stream",
46
- "text": [
47
- "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
48
- "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
49
- "sagemaker role arn: arn:aws:iam::297308036828:role/service-role/AmazonSageMaker-ExecutionRole-20231008T201275\n",
50
- "sagemaker bucket: sagemaker-us-west-2-297308036828\n",
51
- "sagemaker session region: us-west-2\n"
52
- ]
53
- }
54
- ],
55
  "source": [
56
  "import sagemaker\n",
57
  "import boto3\n",
@@ -67,6 +68,7 @@
67
  " role = sagemaker.get_execution_role()\n",
68
  "except ValueError:\n",
69
  " iam = boto3.client('iam')\n",
 
70
  " role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-20231008T201275')['Role']['Arn']\n",
71
  "\n",
72
  "sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)\n",
@@ -76,27 +78,18 @@
76
  "print(f\"sagemaker session region: {sess.boto_region_name}\")"
77
  ]
78
  },
 
 
 
 
 
 
 
79
  {
80
  "cell_type": "code",
81
- "execution_count": 12,
82
  "metadata": {},
83
- "outputs": [
84
- {
85
- "name": "stdout",
86
- "output_type": "stream",
87
- "text": [
88
- "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
89
- "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n"
90
- ]
91
- },
92
- {
93
- "name": "stdout",
94
- "output_type": "stream",
95
- "text": [
96
- "model uploaded to: s3://sagemaker-us-west-2-297308036828/llava-v1.5-7b/model.tar.gz\n"
97
- ]
98
- }
99
- ],
100
  "source": [
101
  "from sagemaker.s3 import S3Uploader\n",
102
  "\n",
@@ -107,17 +100,15 @@
107
  ]
108
  },
109
  {
110
- "cell_type": "code",
111
- "execution_count": 2,
112
  "metadata": {},
113
- "outputs": [],
114
  "source": [
115
- "# s3_model_uri = \"s3://sagemaker-us-west-2-297308036828/llava-v1.5-7b/model.tar.gz\""
116
  ]
117
  },
118
  {
119
  "cell_type": "code",
120
- "execution_count": 14,
121
  "metadata": {},
122
  "outputs": [
123
  {
@@ -128,7 +119,7 @@
128
  "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
129
  "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
130
  "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
131
- "------------------!"
132
  ]
133
  }
134
  ],
@@ -155,103 +146,176 @@
155
  ")"
156
  ]
157
  },
 
 
 
 
 
 
 
158
  {
159
  "cell_type": "code",
160
- "execution_count": 15,
161
  "metadata": {},
162
  "outputs": [
163
  {
164
  "name": "stdout",
165
  "output_type": "stream",
166
  "text": [
167
- "(optional)\n",
168
- "\n",
169
- "The image is a black and white photograph of a man standing in front of a building. The man is wearing a suit and tie, and he appears to be looking off into the distance. The building in the background is large and imposing, with many windows and a prominent clock tower. The overall atmosphere of the image is one of elegance and sophistication.\n"
170
  ]
171
  }
172
  ],
173
  "source": [
174
  "data = {\n",
175
  " \"image\" : 'https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png', \n",
176
- " \"question\" : \"Describe the image and color details.\"\n",
 
 
 
177
  "}\n",
178
  "\n",
179
- "# max_new_tokens = data.pop(\"max_new_tokens\", 1024)\n",
180
- "# temperature = data.pop(\"temperature\", 0.2)\n",
181
- "# stop_str = data.pop(\"stop_str\", \"###\")\n",
182
- "\n",
183
  "# request\n",
184
  "output = predictor.predict(data)\n",
185
  "print(output)"
186
  ]
187
  },
 
 
 
 
 
 
 
188
  {
189
  "cell_type": "code",
190
- "execution_count": 17,
191
  "metadata": {},
192
  "outputs": [
193
  {
194
  "name": "stdout",
195
  "output_type": "stream",
196
  "text": [
197
- "The image features a unique and eye-catching toy, which is a red and orange plastic horse with a pair of glasses on its face. The horse has a fire effect, giving it a fiery appearance. The glasses on the horse's face add a whimsical touch to the toy. The overall color scheme of the toy is predominantly red and orange, with the fire effect further enhancing the vibrant colors.\n"
198
  ]
199
  }
200
  ],
201
  "source": [
202
  "from llava.conversation import conv_templates, SeparatorStyle\n",
203
  "from llava.constants import (\n",
204
- "IMAGE_TOKEN_INDEX,\n",
205
  "DEFAULT_IMAGE_TOKEN,\n",
206
  "DEFAULT_IM_START_TOKEN,\n",
207
  "DEFAULT_IM_END_TOKEN,\n",
208
  ")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  "\n",
210
  "raw_prompt = \"Describe the image and color details.\"\n",
 
211
  "image_path = \"https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png\"\n",
212
- "\n",
213
- "conv_mode = \"llava_v1\"\n",
214
- "conv = conv_templates[conv_mode].copy()\n",
215
- "roles = conv.roles\n",
216
- "inp = f\"{roles[0]}: {raw_prompt}\"\n",
217
- "inp = (\n",
218
- " DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + \"\\n\" + inp\n",
219
- ")\n",
220
- "conv.append_message(conv.roles[0], inp)\n",
221
- "conv.append_message(conv.roles[1], None)\n",
222
- "prompt = conv.get_prompt()\n",
223
- "\n",
224
- "stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2\n",
225
- "\n",
226
- "\n",
227
  "data = {\"image\" : image_path, \"question\" : prompt, \"stop_str\" : stop_str}\n",
228
  "output = predictor.predict(data)\n",
229
  "print(output)"
230
  ]
231
  },
232
  {
233
- "cell_type": "code",
234
- "execution_count": 18,
235
  "metadata": {},
236
- "outputs": [],
237
  "source": [
238
- "predictor.delete_endpoint()"
239
  ]
240
  },
241
  {
242
  "cell_type": "code",
243
- "execution_count": null,
244
  "metadata": {},
245
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  "source": [
247
- "from sagemaker.huggingface.model import HuggingFacePredictor\n",
 
 
 
 
 
 
 
 
248
  "\n",
 
249
  "# initial the endpoint predictor\n",
250
- "predictor = HuggingFacePredictor(\n",
251
- " endpoint_name=\"\",\n",
252
  " sagemaker_session=sess\n",
253
  ")"
254
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  }
256
  ],
257
  "metadata": {
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Deploy LLaVA on Amazon SageMaker\n",
8
+ "\n",
9
+ "Amazon SageMaker is a popular platform for running AI models, and models on huggingface deploy [Hugging Face Transformers](https://github.com/huggingface/transformers) using [Amazon SageMaker](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html) and the [Amazon SageMaker Python SDK](https://sagemaker.readthedocs.io/en/stable/).\n",
10
+ "\n",
11
+ "![llava](https://i.imgur.com/YNVG140.png)\n",
12
+ "\n",
13
+ "Install sagemaker sdk:"
14
+ ]
15
+ },
16
  {
17
  "cell_type": "code",
18
  "execution_count": null,
 
22
  "!pip install sagemaker --upgrade"
23
  ]
24
  },
25
+ {
26
+ "cell_type": "markdown",
27
+ "metadata": {},
28
+ "source": [
29
+ "Bundle llava model weights and code into a `model.tar.gz`:"
30
+ ]
31
+ },
32
  {
33
  "cell_type": "code",
34
+ "execution_count": 3,
35
  "metadata": {},
36
  "outputs": [],
37
  "source": [
38
+ "# Create SageMaker model.tar.gz artifact\n",
39
  "!tar -cf model.tar.gz --use-compress-program=pigz *"
40
  ]
41
  },
42
+ {
43
+ "cell_type": "markdown",
44
+ "metadata": {},
45
+ "source": [
46
+ "After we created the `model.tar.gz` archive we can upload it to Amazon S3. We will use the `sagemaker` SDK to upload the model to our sagemaker session bucket.\n",
47
+ "\n",
48
+ "Initialize sagemaker session first:"
49
+ ]
50
+ },
51
  {
52
  "cell_type": "code",
53
+ "execution_count": null,
54
  "metadata": {},
55
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "source": [
57
  "import sagemaker\n",
58
  "import boto3\n",
 
68
  " role = sagemaker.get_execution_role()\n",
69
  "except ValueError:\n",
70
  " iam = boto3.client('iam')\n",
71
+ " # setup your own rolename in sagemaker\n",
72
  " role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-20231008T201275')['Role']['Arn']\n",
73
  "\n",
74
  "sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)\n",
 
78
  "print(f\"sagemaker session region: {sess.boto_region_name}\")"
79
  ]
80
  },
81
+ {
82
+ "cell_type": "markdown",
83
+ "metadata": {},
84
+ "source": [
85
+ "Upload the `model.tar.gz` to our sagemaker session bucket:"
86
+ ]
87
+ },
88
  {
89
  "cell_type": "code",
90
+ "execution_count": null,
91
  "metadata": {},
92
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  "source": [
94
  "from sagemaker.s3 import S3Uploader\n",
95
  "\n",
 
100
  ]
101
  },
102
  {
103
+ "cell_type": "markdown",
 
104
  "metadata": {},
 
105
  "source": [
106
+ "We will use `HuggingfaceModel` to create our real-time inference endpoint:"
107
  ]
108
  },
109
  {
110
  "cell_type": "code",
111
+ "execution_count": 7,
112
  "metadata": {},
113
  "outputs": [
114
  {
 
119
  "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
120
  "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
121
  "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
122
+ "---------------!"
123
  ]
124
  }
125
  ],
 
146
  ")"
147
  ]
148
  },
149
+ {
150
+ "cell_type": "markdown",
151
+ "metadata": {},
152
+ "source": [
153
+ "The `.deploy()` returns an `HuggingFacePredictor` object which can be used to request inference using the `.predict()` method. Our endpoint expects a `json` with at least `image` and `question` key."
154
+ ]
155
+ },
156
  {
157
  "cell_type": "code",
158
+ "execution_count": 9,
159
  "metadata": {},
160
  "outputs": [
161
  {
162
  "name": "stdout",
163
  "output_type": "stream",
164
  "text": [
165
+ "The image is a black and white photograph of a man standing in front of a building. The man is wearing a suit and tie, and he appears to be looking at the camera. The building in the background is large and has many windows. The overall atmosphere of the image is formal and professional.\n"
 
 
166
  ]
167
  }
168
  ],
169
  "source": [
170
  "data = {\n",
171
  " \"image\" : 'https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png', \n",
172
+ " \"question\" : \"Describe the image and color details.\",\n",
173
+ " # \"max_new_tokens\" : 1024,\n",
174
+ " # \"temperature\" : 0.2,\n",
175
+ " # \"stop_str\" : \"###\"\n",
176
  "}\n",
177
  "\n",
 
 
 
 
178
  "# request\n",
179
  "output = predictor.predict(data)\n",
180
  "print(output)"
181
  ]
182
  },
183
+ {
184
+ "cell_type": "markdown",
185
+ "metadata": {},
186
+ "source": [
187
+ "To run inference with `llava` special token:"
188
+ ]
189
+ },
190
  {
191
  "cell_type": "code",
192
+ "execution_count": 11,
193
  "metadata": {},
194
  "outputs": [
195
  {
196
  "name": "stdout",
197
  "output_type": "stream",
198
  "text": [
199
+ "The image features a red toy animal, possibly a horse or a donkey, with a pair of glasses on its face. The toy is made of plastic and has a fire-like appearance, giving it a unique and eye-catching look. The red color of the toy and the glasses on its face create a striking contrast against the background, making it the main focus of the image.\n"
200
  ]
201
  }
202
  ],
203
  "source": [
204
  "from llava.conversation import conv_templates, SeparatorStyle\n",
205
  "from llava.constants import (\n",
 
206
  "DEFAULT_IMAGE_TOKEN,\n",
207
  "DEFAULT_IM_START_TOKEN,\n",
208
  "DEFAULT_IM_END_TOKEN,\n",
209
  ")\n",
210
+ "def get_prompt(raw_prompt):\n",
211
+ " conv_mode = \"llava_v1\"\n",
212
+ " conv = conv_templates[conv_mode].copy()\n",
213
+ " roles = conv.roles\n",
214
+ " inp = f\"{roles[0]}: {raw_prompt}\"\n",
215
+ " inp = (\n",
216
+ " DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + \"\\n\" + inp\n",
217
+ " )\n",
218
+ " conv.append_message(conv.roles[0], inp)\n",
219
+ " conv.append_message(conv.roles[1], None)\n",
220
+ " prompt = conv.get_prompt()\n",
221
+ " stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2\n",
222
+ " return prompt, stop_str\n",
223
  "\n",
224
  "raw_prompt = \"Describe the image and color details.\"\n",
225
+ "prompt, stop_str = get_prompt(raw_prompt)\n",
226
  "image_path = \"https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png\"\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  "data = {\"image\" : image_path, \"question\" : prompt, \"stop_str\" : stop_str}\n",
228
  "output = predictor.predict(data)\n",
229
  "print(output)"
230
  ]
231
  },
232
  {
233
+ "cell_type": "markdown",
 
234
  "metadata": {},
 
235
  "source": [
236
+ "The inference ` predictor` can also be initilized like with your deployed `endpoint_name` :"
237
  ]
238
  },
239
  {
240
  "cell_type": "code",
241
+ "execution_count": 14,
242
  "metadata": {},
243
+ "outputs": [
244
+ {
245
+ "name": "stdout",
246
+ "output_type": "stream",
247
+ "text": [
248
+ "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
249
+ "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n",
250
+ "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n",
251
+ "sagemaker.config INFO - Not applying SDK defaults from location: /Users/tom/Library/Application Support/sagemaker/config.yaml\n"
252
+ ]
253
+ },
254
+ {
255
+ "name": "stderr",
256
+ "output_type": "stream",
257
+ "text": [
258
+ "Couldn't call 'get_role' to get Role ARN from role name arn:aws:iam::297308036828:root to get Role path.\n"
259
+ ]
260
+ }
261
+ ],
262
  "source": [
263
+ "import sagemaker\n",
264
+ "import boto3\n",
265
+ "sess = sagemaker.Session()\n",
266
+ "try:\n",
267
+ " role = sagemaker.get_execution_role()\n",
268
+ "except ValueError:\n",
269
+ " iam = boto3.client('iam')\n",
270
+ " # setup your own rolename in sagemaker\n",
271
+ " role = iam.get_role(RoleName='AmazonSageMaker-ExecutionRole-20231008T201275')['Role']['Arn']\n",
272
  "\n",
273
+ "from sagemaker.huggingface.model import HuggingFacePredictor\n",
274
  "# initial the endpoint predictor\n",
275
+ "predictor2 = HuggingFacePredictor(\n",
276
+ " endpoint_name=\"huggingface-pytorch-inference-2023-10-19-05-57-37-847\",\n",
277
  " sagemaker_session=sess\n",
278
  ")"
279
  ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 15,
284
+ "metadata": {},
285
+ "outputs": [
286
+ {
287
+ "name": "stdout",
288
+ "output_type": "stream",
289
+ "text": [
290
+ "The image features a small toy animal, resembling a horse or a donkey, with a red and orange color scheme. The toy has a pair of glasses on its face, giving it a unique and quirky appearance. The toy is standing on a gray surface, which provides a contrasting background for the vibrant colors of the toy. The combination of red, orange, and gray creates a visually striking scene.\n"
291
+ ]
292
+ }
293
+ ],
294
+ "source": [
295
+ "raw_prompt = \"Describe the image and color details.\"\n",
296
+ "prompt, stop_str = get_prompt(raw_prompt)\n",
297
+ "image_path = \"https://raw.githubusercontent.com/haotian-liu/LLaVA/main/images/llava_logo.png\"\n",
298
+ "data = {\"image\" : image_path, \"question\" : prompt, \"stop_str\" : stop_str}\n",
299
+ "output = predictor2.predict(data)\n",
300
+ "print(output)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "markdown",
305
+ "metadata": {},
306
+ "source": [
307
+ "To clean up, we can delete the model and endpoint by `delete_endpoint()`or using sagemaker console:"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "execution_count": 16,
313
+ "metadata": {},
314
+ "outputs": [],
315
+ "source": [
316
+ "# delete sagemaker endpoint\n",
317
+ "predictor.delete_endpoint()"
318
+ ]
319
  }
320
  ],
321
  "metadata": {