Sony
/

Image-Text-to-Text
Safetensors
English
conversational
SwyWang commited on
Commit
49d5b39
·
verified ·
1 Parent(s): 7eb0198

Upload demo.ipynb

Browse files
Files changed (1) hide show
  1. demo.ipynb +110 -7
demo.ipynb CHANGED
@@ -25,7 +25,7 @@
25
  },
26
  {
27
  "cell_type": "code",
28
- "execution_count": 2,
29
  "metadata": {},
30
  "outputs": [],
31
  "source": [
@@ -66,6 +66,76 @@
66
  "execution_count": null,
67
  "metadata": {},
68
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  {
70
  "name": "stderr",
71
  "output_type": "stream",
@@ -79,7 +149,7 @@
79
  {
80
  "data": {
81
  "application/vnd.jupyter.widget-view+json": {
82
- "model_id": "64e0aee907ed4b29b238f38b74762f95",
83
  "version_major": 2,
84
  "version_minor": 0
85
  },
@@ -90,17 +160,30 @@
90
  "metadata": {},
91
  "output_type": "display_data"
92
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  {
94
  "name": "stdout",
95
  "output_type": "stream",
96
  "text": [
97
- "Loading weights from local directory\n",
98
  "Model initialization is done.\n"
99
  ]
100
  }
101
  ],
102
  "source": [
103
- "model_path = \"aki-phi3.5-mini-4b\"\n",
104
  "config = AutoConfig.from_pretrained(model_path)\n",
105
  "# Load model, image_processor, tokenizer\n",
106
  "model, image_processor, tokenizer = load_model_and_processor(model_path, config=config)"
@@ -108,7 +191,7 @@
108
  },
109
  {
110
  "cell_type": "code",
111
- "execution_count": 8,
112
  "metadata": {},
113
  "outputs": [],
114
  "source": [
@@ -158,7 +241,7 @@
158
  },
159
  {
160
  "cell_type": "code",
161
- "execution_count": 9,
162
  "metadata": {},
163
  "outputs": [
164
  {
@@ -182,7 +265,20 @@
182
  "<image>\n",
183
  "Describe the scene of this image.<|end|>\n",
184
  "<|assistant|>\n",
185
- "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  "Response:\n",
187
  " The image captures a beautiful autumn day in a park, with a pathway covered in a vibrant carpet of fallen leaves. The leaves are in various shades of red, orange, yellow, and brown, creating a warm and colorful atmosphere. The path is lined with trees displaying beautiful autumn foliage, adding to the picturesque setting.\n",
188
  "\n",
@@ -196,6 +292,13 @@
196
  "response = process_input(image_path, text_input)\n",
197
  "print(\"Response:\\n\", response)"
198
  ]
 
 
 
 
 
 
 
199
  }
200
  ],
201
  "metadata": {
 
25
  },
26
  {
27
  "cell_type": "code",
28
+ "execution_count": null,
29
  "metadata": {},
30
  "outputs": [],
31
  "source": [
 
66
  "execution_count": null,
67
  "metadata": {},
68
  "outputs": [
69
+ {
70
+ "data": {
71
+ "application/vnd.jupyter.widget-view+json": {
72
+ "model_id": "09f13b1e5bae415f9f6a123d836b1d47",
73
+ "version_major": 2,
74
+ "version_minor": 0
75
+ },
76
+ "text/plain": [
77
+ "config.json: 0%| | 0.00/387 [00:00<?, ?B/s]"
78
+ ]
79
+ },
80
+ "metadata": {},
81
+ "output_type": "display_data"
82
+ },
83
+ {
84
+ "data": {
85
+ "application/vnd.jupyter.widget-view+json": {
86
+ "model_id": "e6f700961c5c4da3b326106d2dae74a2",
87
+ "version_major": 2,
88
+ "version_minor": 0
89
+ },
90
+ "text/plain": [
91
+ "tokenizer_config.json: 0%| | 0.00/3.99k [00:00<?, ?B/s]"
92
+ ]
93
+ },
94
+ "metadata": {},
95
+ "output_type": "display_data"
96
+ },
97
+ {
98
+ "data": {
99
+ "application/vnd.jupyter.widget-view+json": {
100
+ "model_id": "9254a5341c3e44eeba3f2aed206701e5",
101
+ "version_major": 2,
102
+ "version_minor": 0
103
+ },
104
+ "text/plain": [
105
+ "tokenizer.model: 0%| | 0.00/500k [00:00<?, ?B/s]"
106
+ ]
107
+ },
108
+ "metadata": {},
109
+ "output_type": "display_data"
110
+ },
111
+ {
112
+ "data": {
113
+ "application/vnd.jupyter.widget-view+json": {
114
+ "model_id": "7fadd9569a1549a2a81e4f0742d6910a",
115
+ "version_major": 2,
116
+ "version_minor": 0
117
+ },
118
+ "text/plain": [
119
+ "added_tokens.json: 0%| | 0.00/358 [00:00<?, ?B/s]"
120
+ ]
121
+ },
122
+ "metadata": {},
123
+ "output_type": "display_data"
124
+ },
125
+ {
126
+ "data": {
127
+ "application/vnd.jupyter.widget-view+json": {
128
+ "model_id": "828eef207c5e4037ae41da262449b165",
129
+ "version_major": 2,
130
+ "version_minor": 0
131
+ },
132
+ "text/plain": [
133
+ "special_tokens_map.json: 0%| | 0.00/887 [00:00<?, ?B/s]"
134
+ ]
135
+ },
136
+ "metadata": {},
137
+ "output_type": "display_data"
138
+ },
139
  {
140
  "name": "stderr",
141
  "output_type": "stream",
 
149
  {
150
  "data": {
151
  "application/vnd.jupyter.widget-view+json": {
152
+ "model_id": "4d2309ed29bb477d991fa95f556dbb4e",
153
  "version_major": 2,
154
  "version_minor": 0
155
  },
 
160
  "metadata": {},
161
  "output_type": "display_data"
162
  },
163
+ {
164
+ "data": {
165
+ "application/vnd.jupyter.widget-view+json": {
166
+ "model_id": "5d125663ad7f46c0b4820ea4d236f01a",
167
+ "version_major": 2,
168
+ "version_minor": 0
169
+ },
170
+ "text/plain": [
171
+ "model.safetensors: 0%| | 0.00/17.3G [00:00<?, ?B/s]"
172
+ ]
173
+ },
174
+ "metadata": {},
175
+ "output_type": "display_data"
176
+ },
177
  {
178
  "name": "stdout",
179
  "output_type": "stream",
180
  "text": [
 
181
  "Model initialization is done.\n"
182
  ]
183
  }
184
  ],
185
  "source": [
186
+ "model_path = \"Sony/AKI-4B-phi-3.5-mini\"\n",
187
  "config = AutoConfig.from_pretrained(model_path)\n",
188
  "# Load model, image_processor, tokenizer\n",
189
  "model, image_processor, tokenizer = load_model_and_processor(model_path, config=config)"
 
191
  },
192
  {
193
  "cell_type": "code",
194
+ "execution_count": 9,
195
  "metadata": {},
196
  "outputs": [],
197
  "source": [
 
241
  },
242
  {
243
  "cell_type": "code",
244
+ "execution_count": 10,
245
  "metadata": {},
246
  "outputs": [
247
  {
 
265
  "<image>\n",
266
  "Describe the scene of this image.<|end|>\n",
267
  "<|assistant|>\n",
268
+ "\n"
269
+ ]
270
+ },
271
+ {
272
+ "name": "stderr",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "You are not running the flash-attention implementation, expect numerical differences.\n"
276
+ ]
277
+ },
278
+ {
279
+ "name": "stdout",
280
+ "output_type": "stream",
281
+ "text": [
282
  "Response:\n",
283
  " The image captures a beautiful autumn day in a park, with a pathway covered in a vibrant carpet of fallen leaves. The leaves are in various shades of red, orange, yellow, and brown, creating a warm and colorful atmosphere. The path is lined with trees displaying beautiful autumn foliage, adding to the picturesque setting.\n",
284
  "\n",
 
292
  "response = process_input(image_path, text_input)\n",
293
  "print(\"Response:\\n\", response)"
294
  ]
295
+ },
296
+ {
297
+ "cell_type": "code",
298
+ "execution_count": null,
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": []
302
  }
303
  ],
304
  "metadata": {