Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <meta name="referrer" content="no-referrer"> | |
| <title>A/B Test Captioning UI</title> | |
| <style> | |
| html { | |
| color-scheme: light dark; | |
| font-family: sans-serif; | |
| } | |
| table { | |
| border-collapse: collapse; | |
| min-width: 3000px; | |
| } | |
| table th { | |
| text-align: center; | |
| } | |
| table td { | |
| text-align: left; | |
| white-space: pre-wrap; | |
| } | |
| button { | |
| cursor: pointer; | |
| } | |
| th, | |
| td { | |
| border: 1px solid light-dark(black, #515151); | |
| padding: 8px; | |
| vertical-align: top; | |
| min-width: 500px; | |
| max-width: 500px; | |
| } | |
| textarea { | |
| width: 100%; | |
| box-sizing: border-box; | |
| height: 60px; | |
| } | |
| img { | |
| max-width: min(100%, 20vw); | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <button onclick="readmeCtn.hidden=!readmeCtn.hidden">ℹ️ toggle readme</button> | |
| <pre hidden id="readmeCtn"> | |
| <b>Note: This should work for any OpenAI-compatible API server.</b> | |
| <b>STEP 1:</b> Start an OpenAI-compatible API server, for example you can use these params on Runpod: | |
| <span style="opacity:0.5">IMAGE:</span> openmmlab/lmdeploy:v0.6.2.post1-cu12 | |
| <span style="opacity:0.5">COMMAND:</span> bash -c "pip install timm==1.0.7 && lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --model-name OpenGVLab/InternVL2-40B-AWQ --backend turbomind --server-port 3000 --model-format awq --quant-policy 4" | |
| <span style="opacity:0.5">VOLUME PATH:</span> /root | |
| <span style="opacity:0.5">CONTAINER DISK:</span> 10GB | |
| <span style="opacity:0.5">VOLUME DISK:</span> 60GB | |
| <span style="opacity:0.5">PORT:</span> 3000 | |
| <b>STEP 2:</b> Enter your server endpoint url in the input below (e.g. something like like 'https://uf1kmzigq0p5bd-3000.proxy.runpod.net' if you're using the above server command). | |
| <b>STEP 3:</b> Enter a system prompt and a prompt at the top of one or more columns, and optionally a prefix that you want the resulting caption to start with. | |
| <b>STEP 4:</b> Select some images to test your prompt on (Ctrl+Click to select multiple images). | |
| <b>STEP 5:</b> Click 'Compute captions for all columns' or click one of the individual column buttons. | |
| </pre> | |
| <br><br> | |
| <div style="margin:0.5rem 0;">Scale images down if larger than: <input id="maxImageSizeEl" placeholder="Max image size (px)" style="max-width:70px;" oninput="localStorage.maxImageSize=this.value; updateTable()"></div> | |
| <script>maxImageSizeEl.value = localStorage.maxImageSize || '768';</script> | |
| <div style="margin:0.5rem 0;"> | |
| Defaults: | |
| <input id="serverOriginEl" placeholder="OpenAI-compatible server URL" style="width:300px;" oninput="localStorage.serverOrigin=this.value"> | |
| <script>serverOriginEl.value = localStorage.serverOrigin || '';</script> | |
| <input id="modelNameEl" placeholder="Model name (huggingface repo)" style="width:300px;" oninput="localStorage.modelName=this.value"> | |
| <script>modelNameEl.value = localStorage.modelName || 'OpenGVLab/InternVL2-40B-AWQ';</script> | |
| <input id="apiKeyEl" placeholder="API key (optional)" style="width:300px;" oninput="localStorage.apiKey=this.value"> | |
| <script>apiKeyEl.value = localStorage.apiKey || '';</script> | |
| </div> | |
| <button id="imageLoader"><b>1. Select test images</b></button> | |
| <button onclick="computeCaptions()"><b>2. Compute captions for all columns</b></button> | |
| <table id="captionTable" style="margin-top:0.5rem;"> | |
| <tr> | |
| <th>Images</th> | |
| <th> | |
| <div style="display:flex;"><input style="flex-grow:1;" id="prompt1ServerOriginEl" placeholder="(Optional) Override server URL"><input style="flex-grow:1;" id="prompt1ModelNameEl" placeholder="(Optional) Override model name"></div> | |
| <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 1</div> | |
| <textarea id="systemPrompt1El" placeholder="System prompt 1, e.g. 'You are an expert image captioner, ...'" oninput="localStorage.systemPrompt1=this.value"></textarea> | |
| <textarea id="prompt1El" placeholder="Prompt 1, e.g. 'Caption this image.'" oninput="localStorage.prompt1=this.value"></textarea> | |
| <textarea id="prefix1El" placeholder="(Optional) Prefix 1, e.g. 'It's an image of'" oninput="localStorage.prefix1=this.value"></textarea> | |
| <script>prompt1El.value = localStorage.prompt1 || ''; prefix1El.value = localStorage.prefix1 || ''; systemPrompt1El.value = localStorage.systemPrompt1 || '';</script> | |
| <button onclick="computeCaptions(0); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button> | |
| </th> | |
| <th> | |
| <div style="display:flex;"><input style="flex-grow:1;" id="prompt2ServerOriginEl" placeholder="(Optional) Override server URL"><input style="flex-grow:1;" id="prompt2ModelNameEl" placeholder="(Optional) Override model name"></div> | |
| <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 2</div> | |
| <textarea id="systemPrompt2El" placeholder="System prompt 2, e.g. 'You are an assistant who accurately ...'" oninput="localStorage.systemPrompt2=this.value"></textarea> | |
| <textarea id="prompt2El" placeholder="Prompt 2, e.g. 'Describe this image in extreme detail.'" oninput="localStorage.prompt2=this.value"></textarea> | |
| <textarea id="prefix2El" placeholder="(Optional) Prefix 2, e.g. 'The image depicts'" oninput="localStorage.prefix2=this.value"></textarea> | |
| <script>prompt2El.value = localStorage.prompt2 || ''; prefix2El.value = localStorage.prefix2 || ''; systemPrompt2El.value = localStorage.systemPrompt2 || '';</script> | |
| <button onclick="computeCaptions(1); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button> | |
| </th> | |
| <th> | |
| <div style="display:flex;"><input style="flex-grow:1;" id="prompt3ServerOriginEl" placeholder="(Optional) Override server URL"><input style="flex-grow:1;" id="prompt3ModelNameEl" placeholder="(Optional) Override model name"></div> | |
| <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 3</div> | |
| <textarea id="systemPrompt3El" placeholder="System prompt 3" oninput="localStorage.systemPrompt3=this.value"></textarea> | |
| <textarea id="prompt3El" placeholder="Prompt 3" oninput="localStorage.prompt3=this.value"></textarea> | |
| <textarea id="prefix3El" placeholder="Prefix 3" oninput="localStorage.prefix3=this.value"></textarea> | |
| <script>prompt3El.value = localStorage.prompt3 || ''; prefix3El.value = localStorage.prefix3 || ''; systemPrompt3El.value = localStorage.systemPrompt3 || '';</script> | |
| <button onclick="computeCaptions(2); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button> | |
| </th> | |
| <th> | |
| <div style="display:flex;"><input style="flex-grow:1;" id="prompt4ServerOriginEl" placeholder="(Optional) Override server URL"><input style="flex-grow:1;" id="prompt4ModelNameEl" placeholder="(Optional) Override model name"></div> | |
| <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 4</div> | |
| <textarea id="systemPrompt4El" placeholder="System prompt 4" oninput="localStorage.systemPrompt4=this.value"></textarea> | |
| <textarea id="prompt4El" placeholder="Prompt 4" oninput="localStorage.prompt4=this.value"></textarea> | |
| <textarea id="prefix4El" placeholder="Prefix 4" oninput="localStorage.prefix4=this.value"></textarea> | |
| <script>prompt4El.value = localStorage.prompt4 || ''; prefix4El.value = localStorage.prefix4 || ''; systemPrompt4El.value = localStorage.systemPrompt4 || '';</script> | |
| <button onclick="computeCaptions(3); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button> | |
| </th> | |
| <th> | |
| <div style="display:flex;"><input style="flex-grow:1;" id="prompt5ServerOriginEl" placeholder="(Optional) Override server URL"><input style="flex-grow:1;" id="prompt5ModelNameEl" placeholder="(Optional) Override model name"></div> | |
| <div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 5</div> | |
| <textarea id="systemPrompt5El" placeholder="System prompt 5" oninput="localStorage.systemPrompt5=this.value"></textarea> | |
| <textarea id="prompt5El" placeholder="Prompt 5" oninput="localStorage.prompt5=this.value"></textarea> | |
| <textarea id="prefix5El" placeholder="Prefix 5" oninput="localStorage.prefix5=this.value"></textarea> | |
| <script>prompt5El.value = localStorage.prompt5 || ''; prefix5El.value = localStorage.prefix5 || ''; systemPrompt5El.value = localStorage.systemPrompt5 || '';</script> | |
| <button onclick="computeCaptions(4); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button> | |
| </th> | |
| </tr> | |
| </table> | |
| <script> | |
| window.testImages = []; | |
| let imageHandles = []; | |
| const fileInput = Object.assign(document.createElement('input'), { | |
| type: 'file', multiple: true, | |
| accept: '.png,.gif,.jpeg,.jpg,.webp,.avif,.svg,image/*', | |
| style: 'display:none', | |
| }); | |
| document.body.appendChild(fileInput); | |
| document.getElementById('imageLoader').addEventListener('click', async () => { | |
| window.testImages = []; | |
| try { | |
| imageHandles = await window.showOpenFilePicker({ | |
| multiple: true, | |
| types: [{ description: 'Images', accept: { 'image/*': ['.png','.gif','.jpeg','.jpg','.webp','.avif','.svg'] }}] | |
| }); | |
| } catch { | |
| fileInput.click(); | |
| return; | |
| } | |
| let { set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js'); | |
| await set('imageHandles', imageHandles); | |
| await loadImagesFromHandles(); | |
| }); | |
| fileInput.addEventListener('change', async ({ target: { files } }) => { | |
| if (!files.length) return; | |
| imageHandles = Array.from(files).map(file => ({ getFile: async () => file, kind: 'file', name: file.name })); | |
| for (const handle of imageHandles) { | |
| const file = await handle.getFile(); | |
| const dataUrl = await fileToDataUrl(file); | |
| window.testImages.push(dataUrl); | |
| } | |
| updateTable(); | |
| }); | |
| async function loadImagesFromHandles() { | |
| window.testImages = []; | |
| for (const handle of imageHandles) { | |
| const file = await handle.getFile(); | |
| const dataUrl = await fileToDataUrl(file); | |
| window.testImages.push(dataUrl); | |
| } | |
| updateTable(); | |
| } | |
| function fileToDataUrl(file) { | |
| return new Promise((resolve, reject) => { | |
| const reader = new FileReader(); | |
| reader.onload = () => resolve(reader.result); | |
| reader.onerror = reject; | |
| reader.readAsDataURL(file); | |
| }); | |
| } | |
| // Add this function to load images when the page loads | |
| async function loadSavedImages() { | |
| let { get, set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js'); | |
| imageHandles = await get('imageHandles') || []; | |
| if (imageHandles.length > 0) { | |
| await loadImagesFromHandles(); | |
| } | |
| } | |
| // Call this function when the page loads | |
| window.addEventListener('load', async () => { | |
| // await new Promise(rs => document.addEventListener('click', rs, { once: true })); | |
| loadSavedImages(); | |
| }); | |
| function resizeDataUrl({ dataUrl, maxWidth, maxHeight }) { | |
| return new Promise((resolve) => { | |
| const img = new Image(); | |
| img.onload = () => { | |
| const canvas = document.createElement('canvas'); | |
| let { width, height } = img; | |
| if (width > maxWidth || height > maxHeight) { | |
| const ratio = Math.min(maxWidth / width, maxHeight / height); | |
| width *= ratio; | |
| height *= ratio; | |
| } | |
| canvas.width = width; | |
| canvas.height = height; | |
| canvas.getContext('2d').drawImage(img, 0, 0, width, height); | |
| resolve(canvas.toDataURL("image/jpeg")); | |
| }; | |
| img.src = dataUrl; | |
| }); | |
| }; | |
| async function updateTable() { | |
| const table = document.getElementById('captionTable'); | |
| // Clear existing rows except header | |
| while (table.rows.length > 1) { | |
| table.deleteRow(1); | |
| } | |
| for (let i = 0; i < window.testImages.length; i++) { | |
| const row = table.insertRow(-1); | |
| const cell = row.insertCell(0); | |
| const img = document.createElement('img'); | |
| img.style.pointerEvents = "auto"; | |
| let maxImageSize = Number(maxImageSizeEl.value); | |
| let imageUrl = await resizeDataUrl({ dataUrl: window.testImages[i], maxWidth: maxImageSize, maxHeight: maxImageSize }); | |
| img.src = imageUrl; | |
| cell.appendChild(img); | |
| cell.style.pointerEvents = "none"; | |
| cell.style.position = 'sticky'; | |
| cell.style.left = '0'; | |
| for (let j = 0; j < 5; j++) { | |
| const cell = row.insertCell(-1); | |
| cell.textContent = `Caption will appear here`; | |
| } | |
| } | |
| } | |
| async function computeCaptions(columnI=null) { | |
| if(window.testImages.length === 0) return alert("Choose images first."); | |
| if(!serverOriginEl.value.startsWith("https://") && !serverOriginEl.value.startsWith("http://")) return alert("Please enter a valid server URL. It should start with 'https://' or 'http://'."); | |
| const serverOrigins = [ | |
| prompt1ServerOriginEl.value.trim() || serverOriginEl.value.trim(), | |
| prompt2ServerOriginEl.value.trim() || serverOriginEl.value.trim(), | |
| prompt3ServerOriginEl.value.trim() || serverOriginEl.value.trim(), | |
| prompt4ServerOriginEl.value.trim() || serverOriginEl.value.trim(), | |
| prompt5ServerOriginEl.value.trim() || serverOriginEl.value.trim(), | |
| ]; | |
| const modelNames = [ | |
| prompt1ModelNameEl.value.trim() || modelNameEl.value.trim(), | |
| prompt2ModelNameEl.value.trim() || modelNameEl.value.trim(), | |
| prompt3ModelNameEl.value.trim() || modelNameEl.value.trim(), | |
| prompt4ModelNameEl.value.trim() || modelNameEl.value.trim(), | |
| prompt5ModelNameEl.value.trim() || modelNameEl.value.trim(), | |
| ]; | |
| for(let modelName of modelNames) { | |
| if(modelName.startsWith("https://huggingface.co/")) modelName = modelName.replace("https://huggingface.co/", "") | |
| modelName = modelName.replace(/\/$/, ""); // remove trailing slash | |
| } | |
| const systemPrompts = [ | |
| systemPrompt1El.value || '', | |
| systemPrompt2El.value || '', | |
| systemPrompt3El.value || '', | |
| systemPrompt4El.value || '', | |
| systemPrompt5El.value || '', | |
| ]; | |
| const prompts = [ | |
| prompt1El.value || '', | |
| prompt2El.value || '', | |
| prompt3El.value || '', | |
| prompt4El.value || '', | |
| prompt5El.value || '', | |
| ]; | |
| const prefixes = [ | |
| prefix1El.value || '', | |
| prefix2El.value || '', | |
| prefix3El.value || '', | |
| prefix4El.value || '', | |
| prefix5El.value || '', | |
| ]; | |
| const table = document.getElementById('captionTable'); | |
| for (let i = 1; i < table.rows.length; i++) { | |
| for (let j = 1; j < 6; j++) { | |
| if(columnI !== null && j !== columnI+1) continue; | |
| let systemPrompt = systemPrompts[j-1].trim(); | |
| let prompt = prompts[j-1].trim(); | |
| let prefix = prefixes[j-1].trim(); | |
| let serverOrigin = serverOrigins[j-1].trim(); | |
| let modelName = modelNames[j-1].trim(); | |
| if(!systemPrompt && !prompt && !prefix) continue; | |
| const cell = table.rows[i].cells[j]; | |
| cell.innerHTML = `⏳ Computing...<br><progress></progress>`; | |
| computeCaption(window.testImages[i-1], systemPrompt, prompt, prefix, serverOrigin, modelName).then(caption => { | |
| cell.textContent = caption; | |
| }); | |
| } | |
| } | |
| } | |
| async function computeCaption(imageUrl, systemPrompt, prompt, prefix, serverOrigin, modelName) { | |
| let maxImageSize = Number(maxImageSizeEl.value); | |
| let originalImageSize = imageUrl.length; | |
| imageUrl = await resizeDataUrl({ dataUrl: imageUrl, maxWidth: maxImageSize, maxHeight: maxImageSize }); | |
| console.log(`original size: ${originalImageSize}, new size: ${imageUrl.length}`); | |
| let messages = []; | |
| if(systemPrompt.trim()) messages.push({ role:"system", content:systemPrompt.trim() }); | |
| messages.push({ | |
| role: "user", | |
| content: [ | |
| { "type": "text", "text": prompt.trim() }, | |
| { "type": "image_url", "image_url": { "url": imageUrl } }, | |
| ], | |
| }); | |
| if(prefix.trim()) messages.push({ role:"assistant", content:prefix.trim() }); | |
| let headers = { | |
| "content-type": "application/json", | |
| "Authorization": `Bearer ${apiKeyEl.value}`, | |
| }; | |
| if(serverOrigin.trim().includes("api.anthropic.com/")) headers["anthropic-dangerous-direct-browser-access"] = "true"; | |
| let startTime = Date.now(); | |
| let result = await fetch(`${serverOrigin.trim().replace(/\/$/, "")}/v1/chat/completions`, { | |
| headers, | |
| body: JSON.stringify({ | |
| model: modelName.trim(), | |
| stream: false, | |
| messages, | |
| }), | |
| method: "POST", | |
| }).then(r => r.json()).catch(e => console.error("Error computing caption:", e)); | |
| if(result?.object === "error") return alert(result.message); | |
| console.log(result.choices[0].message.content); | |
| console.log("tokens:", result.usage.total_tokens); | |
| console.log("time:", Date.now() - startTime); | |
| return (prefix || "") + result.choices[0].message.content; | |
| } | |
| </script> | |
| </body> | |
| </html> |