File size: 21,924 Bytes
b09f300
 
 
 
 
 
23978d3
b09f300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef30362
 
 
 
 
 
 
b09f300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b04f6b
b09f300
 
 
 
 
 
 
860213a
 
dc97950
 
b09f300
 
 
 
 
 
 
 
 
8599e34
b09f300
 
 
 
3e41ed7
b09f300
 
 
8599e34
b09f300
 
 
 
3e41ed7
b09f300
 
 
8599e34
b09f300
 
 
 
3e41ed7
b09f300
 
 
8599e34
b09f300
 
 
 
3e41ed7
b09f300
 
 
8599e34
b09f300
 
 
 
3e41ed7
b09f300
 
 
 
 
 
 
 
 
fabf72e
 
 
 
 
 
 
 
8164d28
b09f300
fabf72e
b09f300
fabf72e
b09f300
fabf72e
 
 
b09f300
fabf72e
 
 
 
 
 
 
 
0aec3e5
 
 
 
 
 
b09f300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fcf892
 
 
 
b09f300
 
 
 
 
 
 
 
8fcf892
 
 
812cad0
b09f300
3e41ed7
 
 
 
 
 
 
 
b09f300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e41ed7
b09f300
 
 
 
812cad0
b09f300
3e41ed7
b09f300
 
 
 
 
 
 
3e41ed7
b09f300
 
 
 
 
 
860213a
 
 
 
 
 
 
 
 
 
 
084cc2f
 
3e41ed7
084cc2f
 
 
dc97950
 
 
b09f300
8fcf892
084cc2f
b09f300
 
dc97950
b09f300
860213a
b09f300
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <meta name="referrer" content="no-referrer">
  <title>A/B Test Captioning UI</title>
  <style>
    html {
      color-scheme: light dark;
      font-family: sans-serif;
    }
    table {
      border-collapse: collapse;
      min-width: 3000px;
    }

    table th {
      text-align: center;
    }
    table td {
      text-align: left;
      white-space: pre-wrap;
    }

    button {
      cursor: pointer;
    }

    th,
    td {
      border: 1px solid light-dark(black, #515151);
      padding: 8px;
      vertical-align: top;
      min-width: 500px;
      max-width: 500px;
    }

    textarea {
      width: 100%;
      box-sizing: border-box;
      height: 60px;
    }

    img {
      max-width: min(100%, 20vw);
    }
  </style>
</head>

<body>

  <script>
    window.hashParams = document.location.hash ? Object.fromEntries(document.location.hash.slice(1).split(";").map(kv => kv.split("="))) : ({});
    if(window.hashParams.serverOrigin) localStorage.serverOrigin = window.hashParams.serverOrigin;
    if(window.hashParams.modelName) localStorage.modelName = window.hashParams.modelName;
    if(window.hashParams.apiKey) localStorage.apiKey = window.hashParams.apiKey;
  </script>

  <button onclick="readmeCtn.hidden=!readmeCtn.hidden">ℹ️ toggle readme</button>
<pre hidden id="readmeCtn">
<b>Note: This should work for any OpenAI-compatible API server.</b>

<b>STEP 1:</b> Start an OpenAI-compatible API server, for example you can use these params on Runpod:
<span style="opacity:0.5">IMAGE:</span> openmmlab/lmdeploy:v0.6.2.post1-cu12
<span style="opacity:0.5">COMMAND:</span> bash -c "pip install timm==1.0.7 && lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --model-name OpenGVLab/InternVL2-40B-AWQ --backend turbomind --server-port 3000 --model-format awq --quant-policy 4"
<span style="opacity:0.5">VOLUME PATH:</span> /root
<span style="opacity:0.5">CONTAINER DISK:</span> 10GB
<span style="opacity:0.5">VOLUME DISK:</span> 60GB
<span style="opacity:0.5">PORT:</span> 3000

<b>STEP 2:</b> Enter your server endpoint url in the input below (e.g. something like like 'https://uf1kmzigq0p5bd-3000.proxy.runpod.net' if you're using the above server command).

<b>STEP 3:</b> Enter a system prompt and a prompt at the top of one or more columns, and optionally a prefix that you want the resulting caption to start with.

<b>STEP 4:</b> Select some images to test your prompt on (Ctrl+Click to select multiple images).

<b>STEP 5:</b> Click 'Compute captions for all columns' or click one of the individual column buttons.
</pre>
  <br><br>

  <div style="margin:0.5rem 0;">Scale images down if larger than: <input id="maxImageSizeEl" placeholder="Max image size (px)" style="max-width:70px;" oninput="localStorage.maxImageSize=this.value; updateTable()"></div>
  <script>maxImageSizeEl.value = localStorage.maxImageSize || '1024';</script>
  
  <div style="margin:0.5rem 0;">
    Defaults:
    <input id="serverOriginEl" placeholder="OpenAI-compatible server URL" style="width:300px;" oninput="localStorage.serverOrigin=this.value">
    <script>serverOriginEl.value = localStorage.serverOrigin || '';</script>
    <input id="modelNameEl" placeholder="Model name (huggingface repo)" style="width:300px;" oninput="localStorage.modelName=this.value">
    <script>modelNameEl.value = localStorage.modelName || 'OpenGVLab/InternVL2-40B-AWQ';</script>
    <input id="apiKeyEl" placeholder="API key (optional)" style="width:300px;" oninput="localStorage.apiKey=this.value">
    <script>apiKeyEl.value = localStorage.apiKey || '';</script>
    <input id="temperatureEl" placeholder="Temperature (defaults to 1)" style="width:300px;" oninput="localStorage.temperature=this.value">
    <script>temperatureEl.value = localStorage.temperature || '';</script>
  </div>
  
  <button id="imageLoader"><b>1. Select test images</b></button>
  <button onclick="computeCaptions()"><b>2. Compute captions for all columns</b></button>

  <table id="captionTable" style="margin-top:0.5rem;">
    <tr>
      <th>Images</th>
      <th>
        <div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt1ServerOriginEl" oninput="localStorage.prompt1ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt1ModelNameEl" oninput="localStorage.prompt1ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt1ApiKeyEl" oninput="localStorage.prompt1ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
        <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 1</div>
        <textarea id="systemPrompt1El" placeholder="System prompt 1, e.g. 'You are an expert image captioner, ...'" oninput="localStorage.systemPrompt1=this.value"></textarea>
        <textarea id="prompt1El" placeholder="Prompt 1, e.g. 'Caption this image.'" oninput="localStorage.prompt1=this.value"></textarea>
        <textarea id="prefix1El" placeholder="(Optional) Prefix 1, e.g. 'It's an image of'" oninput="localStorage.prefix1=this.value"></textarea>
        <script>prompt1El.value = localStorage.prompt1 || ''; prefix1El.value = localStorage.prefix1 || ''; systemPrompt1El.value = localStorage.systemPrompt1 || ''; prompt1ServerOriginEl.value = localStorage.prompt1ServerOriginOverride || ''; prompt1ModelNameEl.value = localStorage.prompt1ModelNameOverride || '';; prompt1ApiKeyEl.value = localStorage.prompt1ApiKeyOverride || '';</script>
        <button onclick="computeCaptions(0); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
      </th>
      <th>
        <div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt2ServerOriginEl" oninput="localStorage.prompt2ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt2ModelNameEl" oninput="localStorage.prompt2ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt2ApiKeyEl" oninput="localStorage.prompt2ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
        <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 2</div>
        <textarea id="systemPrompt2El" placeholder="System prompt 2, e.g. 'You are an assistant who accurately ...'" oninput="localStorage.systemPrompt2=this.value"></textarea>
        <textarea id="prompt2El" placeholder="Prompt 2, e.g. 'Describe this image in extreme detail.'" oninput="localStorage.prompt2=this.value"></textarea>
        <textarea id="prefix2El" placeholder="(Optional) Prefix 2, e.g. 'The image depicts'" oninput="localStorage.prefix2=this.value"></textarea>
        <script>prompt2El.value = localStorage.prompt2 || ''; prefix2El.value = localStorage.prefix2 || '';  systemPrompt2El.value = localStorage.systemPrompt2 || ''; prompt2ServerOriginEl.value = localStorage.prompt2ServerOriginOverride || ''; prompt2ModelNameEl.value = localStorage.prompt2ModelNameOverride || ''; prompt2ApiKeyEl.value = localStorage.prompt2ApiKeyOverride || '';</script>
        <button onclick="computeCaptions(1); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
      </th>
      <th>
        <div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt3ServerOriginEl" oninput="localStorage.prompt3ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt3ModelNameEl" oninput="localStorage.prompt3ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt3ApiKeyEl" oninput="localStorage.prompt3ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
        <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 3</div>
        <textarea id="systemPrompt3El" placeholder="System prompt 3" oninput="localStorage.systemPrompt3=this.value"></textarea>
        <textarea id="prompt3El" placeholder="Prompt 3" oninput="localStorage.prompt3=this.value"></textarea>
        <textarea id="prefix3El" placeholder="Prefix 3" oninput="localStorage.prefix3=this.value"></textarea>
        <script>prompt3El.value = localStorage.prompt3 || ''; prefix3El.value = localStorage.prefix3 || ''; systemPrompt3El.value = localStorage.systemPrompt3 || ''; prompt3ServerOriginEl.value = localStorage.prompt3ServerOriginOverride || ''; prompt3ModelNameEl.value = localStorage.prompt3ModelNameOverride || ''; prompt3ApiKeyEl.value = localStorage.prompt3ApiKeyOverride || '';</script>
        <button onclick="computeCaptions(2); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
      </th>
      <th>
        <div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt4ServerOriginEl" oninput="localStorage.prompt4ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt4ModelNameEl" oninput="localStorage.prompt4ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt4ApiKeyEl" oninput="localStorage.prompt4ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
        <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 4</div>
        <textarea id="systemPrompt4El" placeholder="System prompt 4" oninput="localStorage.systemPrompt4=this.value"></textarea>
        <textarea id="prompt4El" placeholder="Prompt 4" oninput="localStorage.prompt4=this.value"></textarea>
        <textarea id="prefix4El" placeholder="Prefix 4" oninput="localStorage.prefix4=this.value"></textarea>
        <script>prompt4El.value = localStorage.prompt4 || ''; prefix4El.value = localStorage.prefix4 || ''; systemPrompt4El.value = localStorage.systemPrompt4 || ''; prompt4ServerOriginEl.value = localStorage.prompt4ServerOriginOverride || ''; prompt4ModelNameEl.value = localStorage.prompt4ModelNameOverride || ''; prompt4ApiKeyEl.value = localStorage.prompt4ApiKeyOverride || '';</script>
        <button onclick="computeCaptions(3); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
      </th>
      <th>
        <div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt5ServerOriginEl" oninput="localStorage.prompt5ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt5ModelNameEl" oninput="localStorage.prompt5ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt5ApiKeyEl" oninput="localStorage.prompt5ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
        <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 5</div>
        <textarea id="systemPrompt5El" placeholder="System prompt 5" oninput="localStorage.systemPrompt5=this.value"></textarea>
        <textarea id="prompt5El" placeholder="Prompt 5" oninput="localStorage.prompt5=this.value"></textarea>
        <textarea id="prefix5El" placeholder="Prefix 5" oninput="localStorage.prefix5=this.value"></textarea>
        <script>prompt5El.value = localStorage.prompt5 || ''; prefix5El.value = localStorage.prefix5 || ''; systemPrompt5El.value = localStorage.systemPrompt5 || ''; prompt5ServerOriginEl.value = localStorage.prompt5ServerOriginOverride || ''; prompt5ModelNameEl.value = localStorage.prompt5ModelNameOverride || ''; prompt5ApiKeyEl.value = localStorage.prompt5ApiKeyOverride || '';</script>
        <button onclick="computeCaptions(4); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
      </th>
    </tr>
  </table>

  <script>
    window.testImages = [];
    let imageHandles = [];

    const fileInput = Object.assign(document.createElement('input'), {
      type: 'file', multiple: true, 
      accept: '.png,.gif,.jpeg,.jpg,.webp,.avif,.svg,image/*', 
      style: 'display:none',
    });
    document.body.appendChild(fileInput);

    document.getElementById('imageLoader').addEventListener('click', async () => {
      window.testImages = [];
      try {
        imageHandles = await window.showOpenFilePicker({
          multiple: true,
          types: [{ description: 'Images', accept: { 'image/*': ['.png','.gif','.jpeg','.jpg','.webp','.avif','.svg'] }}]
        });
      } catch {
        fileInput.click();
        return;
      }
      let { set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js');
      await set('imageHandles', imageHandles);
      await loadImagesFromHandles();
    });

    fileInput.addEventListener('change', async ({ target: { files } }) => {
      if (!files.length) return;
      imageHandles = Array.from(files).map(file => ({ getFile: async () => file, kind: 'file', name: file.name }));
      for (const handle of imageHandles) {
        const file = await handle.getFile();
        const dataUrl = await fileToDataUrl(file);
        window.testImages.push(dataUrl);
      }
      updateTable();
    });

    async function loadImagesFromHandles() {
      window.testImages = [];
      for (const handle of imageHandles) {
        const file = await handle.getFile();
        const dataUrl = await fileToDataUrl(file);
        window.testImages.push(dataUrl);
      }
      updateTable();
    }

    function fileToDataUrl(file) {
      return new Promise((resolve, reject) => {
        const reader = new FileReader();
        reader.onload = () => resolve(reader.result);
        reader.onerror = reject;
        reader.readAsDataURL(file);
      });
    }

    // Add this function to load images when the page loads
    async function loadSavedImages() {
      let { get, set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js');
      imageHandles = await get('imageHandles') || [];
      if (imageHandles.length > 0) {
        await loadImagesFromHandles();
      }
    }

    // Call this function when the page loads
    window.addEventListener('load', async () => {
      // await new Promise(rs => document.addEventListener('click', rs, { once: true }));
      loadSavedImages();
    });

    function resizeDataUrl({ dataUrl, maxWidth, maxHeight }) {
      return new Promise((resolve) => {
        const img = new Image();
        img.onload = () => {
          const canvas = document.createElement('canvas');
          let { width, height } = img;
          if (width > maxWidth || height > maxHeight) {
            const ratio = Math.min(maxWidth / width, maxHeight / height);
            width *= ratio;
            height *= ratio;
          }
          canvas.width = width;
          canvas.height = height;
          canvas.getContext('2d').drawImage(img, 0, 0, width, height);
          resolve(canvas.toDataURL("image/jpeg"));
        };
        img.src = dataUrl;
      });
    };

    async function updateTable() {
      const table = document.getElementById('captionTable');
      // Clear existing rows except header
      while (table.rows.length > 1) {
        table.deleteRow(1);
      }

      for (let i = 0; i < window.testImages.length; i++) {
        const row = table.insertRow(-1);
        const cell = row.insertCell(0);
        const img = document.createElement('img');
        img.style.pointerEvents = "auto";
        
        let maxImageSize = Number(maxImageSizeEl.value);
        let imageUrl = await resizeDataUrl({ dataUrl: window.testImages[i], maxWidth: maxImageSize, maxHeight: maxImageSize });
        img.src = imageUrl;
        cell.appendChild(img);

        cell.style.pointerEvents = "none";
        cell.style.position = 'sticky';
        cell.style.left = '0';

        for (let j = 0; j < 5; j++) {
          const cell = row.insertCell(-1);
          cell.textContent = `Caption will appear here`;
        }
      }
    }

    async function computeCaptions(columnI=null) {

      if(window.testImages.length === 0) return alert("Choose images first.");
      if(!serverOriginEl.value.startsWith("https://") && !serverOriginEl.value.startsWith("http://")) return alert("Please enter a valid server URL. It should start with 'https://' or 'http://'.");

      const serverOrigins = [
        prompt1ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
        prompt2ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
        prompt3ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
        prompt4ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
        prompt5ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
      ];
      for(let i = 0; i < serverOrigins.length; i++) {
        serverOrigins[i] = serverOrigins[i].trim().replace(/\/$/, "");
        if(serverOrigins[i].startsWith("https://generativelanguage.googleapis.com")) serverOrigins[i] = "https://generativelanguage.googleapis.com";
      }

      const modelNames = [
        prompt1ModelNameEl.value.trim() || modelNameEl.value.trim(),
        prompt2ModelNameEl.value.trim() || modelNameEl.value.trim(),
        prompt3ModelNameEl.value.trim() || modelNameEl.value.trim(),
        prompt4ModelNameEl.value.trim() || modelNameEl.value.trim(),
        prompt5ModelNameEl.value.trim() || modelNameEl.value.trim(),
      ];
      for(let i = 0; i < modelNames.length; i++) {
        if(modelNames[i].startsWith("https://huggingface.co/")) modelNames[i] = modelNames[i].replace("https://huggingface.co/", "")
        modelNames[i] = modelNames[i].replace(/\/$/, ""); // remove trailing slash
      }

      const apiKeys = [
        prompt1ApiKeyEl.value || apiKeyEl.value.trim(),
        prompt2ApiKeyEl.value || apiKeyEl.value.trim(),
        prompt3ApiKeyEl.value || apiKeyEl.value.trim(),
        prompt4ApiKeyEl.value || apiKeyEl.value.trim(),
        prompt5ApiKeyEl.value || apiKeyEl.value.trim(),
      ];

      const systemPrompts = [
        systemPrompt1El.value || '',
        systemPrompt2El.value || '',
        systemPrompt3El.value || '',
        systemPrompt4El.value || '',
        systemPrompt5El.value || '',
      ];

      const prompts = [
        prompt1El.value || '',
        prompt2El.value || '',
        prompt3El.value || '',
        prompt4El.value || '',
        prompt5El.value || '',
      ];

      const prefixes = [
        prefix1El.value || '',
        prefix2El.value || '',
        prefix3El.value || '',
        prefix4El.value || '',
        prefix5El.value || '',
      ];

      const table = document.getElementById('captionTable');

      for (let i = 1; i < table.rows.length; i++) {
        for (let j = 1; j < 6; j++) {
          if(columnI !== null && j !== columnI+1) continue;
          let systemPrompt = systemPrompts[j-1].trim();
          let prompt = prompts[j-1].trim();
          let prefix = prefixes[j-1].trim();
          let serverOrigin = serverOrigins[j-1].trim();
          let modelName = modelNames[j-1].trim();
          let apiKey = apiKeys[j-1].trim();

          if(!systemPrompt && !prompt && !prefix) continue;

          const cell = table.rows[i].cells[j];
          cell.innerHTML = `⏳ Computing...<br><progress></progress>`;

          computeCaption(window.testImages[i-1], systemPrompt, prompt, prefix, serverOrigin, modelName, apiKey).then(caption => {
            cell.textContent = caption;
          });

        }
      }
    }

    async function computeCaption(imageUrl, systemPrompt, prompt, prefix, serverOrigin, modelName, apiKey) {

      let maxImageSize = Number(maxImageSizeEl.value);
      let originalImageSize = imageUrl.length;
      imageUrl = await resizeDataUrl({ dataUrl: imageUrl, maxWidth: maxImageSize, maxHeight: maxImageSize });
      console.log(`original size: ${originalImageSize}, new size: ${imageUrl.length}`);

      let messages = [];
      if(systemPrompt.trim()) messages.push({ role:"system", content:systemPrompt.trim() });
      messages.push({
        role: "user",
        content: [
          { "type": "text", "text": prompt.trim() },
          { "type": "image_url", "image_url": { "url": imageUrl } },
        ],
      });
      if(prefix.trim()) messages.push({ role:"assistant", content:prefix.trim() });

      let headers = {
        "content-type": "application/json",
        "Authorization": `Bearer ${apiKey}`,
      };
      if(serverOrigin.trim().includes("api.anthropic.com/")) headers["anthropic-dangerous-direct-browser-access"] = "true";

      let temperature = Number(temperatureEl.value.trim() || 1);
      if(isNaN(temperature)) temperature = 1;

      let startTime = Date.now();
      let result = await fetch(`${serverOrigin}${serverOrigin === "https://generativelanguage.googleapis.com" ? "/v1beta/openai/chat/completions" : "/v1/chat/completions"}`, {
        headers,
        body: JSON.stringify({
          model: modelName.trim(),
          temperature,
          stream: false,
          messages,
        }),
        method: "POST",
      }).then(r => r.json()).catch(e => console.error("Error computing caption:", e));
      if(result?.object === "error") return alert(result.message);

      console.log(result.choices[0].message.content);
      console.log("tokens:", result.usage.total_tokens);
      console.log("time:", Date.now() - startTime);
      return (prefix || "") + result.choices[0].message.content;
    }
  </script>
</body>

</html>