Spaces:
Running
Running
File size: 21,924 Bytes
b09f300 23978d3 b09f300 ef30362 b09f300 0b04f6b b09f300 860213a dc97950 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 fabf72e 8164d28 b09f300 fabf72e b09f300 fabf72e b09f300 fabf72e b09f300 fabf72e 0aec3e5 b09f300 8fcf892 b09f300 8fcf892 812cad0 b09f300 3e41ed7 b09f300 3e41ed7 b09f300 812cad0 b09f300 3e41ed7 b09f300 3e41ed7 b09f300 860213a 084cc2f 3e41ed7 084cc2f dc97950 b09f300 8fcf892 084cc2f b09f300 dc97950 b09f300 860213a b09f300 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="referrer" content="no-referrer">
<title>A/B Test Captioning UI</title>
<style>
html {
color-scheme: light dark;
font-family: sans-serif;
}
table {
border-collapse: collapse;
min-width: 3000px;
}
table th {
text-align: center;
}
table td {
text-align: left;
white-space: pre-wrap;
}
button {
cursor: pointer;
}
th,
td {
border: 1px solid light-dark(black, #515151);
padding: 8px;
vertical-align: top;
min-width: 500px;
max-width: 500px;
}
textarea {
width: 100%;
box-sizing: border-box;
height: 60px;
}
img {
max-width: min(100%, 20vw);
}
</style>
</head>
<body>
<script>
window.hashParams = document.location.hash ? Object.fromEntries(document.location.hash.slice(1).split(";").map(kv => kv.split("="))) : ({});
if(window.hashParams.serverOrigin) localStorage.serverOrigin = window.hashParams.serverOrigin;
if(window.hashParams.modelName) localStorage.modelName = window.hashParams.modelName;
if(window.hashParams.apiKey) localStorage.apiKey = window.hashParams.apiKey;
</script>
<button onclick="readmeCtn.hidden=!readmeCtn.hidden">ℹ️ toggle readme</button>
<pre hidden id="readmeCtn">
<b>Note: This should work for any OpenAI-compatible API server.</b>
<b>STEP 1:</b> Start an OpenAI-compatible API server, for example you can use these params on Runpod:
<span style="opacity:0.5">IMAGE:</span> openmmlab/lmdeploy:v0.6.2.post1-cu12
<span style="opacity:0.5">COMMAND:</span> bash -c "pip install timm==1.0.7 && lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --model-name OpenGVLab/InternVL2-40B-AWQ --backend turbomind --server-port 3000 --model-format awq --quant-policy 4"
<span style="opacity:0.5">VOLUME PATH:</span> /root
<span style="opacity:0.5">CONTAINER DISK:</span> 10GB
<span style="opacity:0.5">VOLUME DISK:</span> 60GB
<span style="opacity:0.5">PORT:</span> 3000
<b>STEP 2:</b> Enter your server endpoint url in the input below (e.g. something like like 'https://uf1kmzigq0p5bd-3000.proxy.runpod.net' if you're using the above server command).
<b>STEP 3:</b> Enter a system prompt and a prompt at the top of one or more columns, and optionally a prefix that you want the resulting caption to start with.
<b>STEP 4:</b> Select some images to test your prompt on (Ctrl+Click to select multiple images).
<b>STEP 5:</b> Click 'Compute captions for all columns' or click one of the individual column buttons.
</pre>
<br><br>
<div style="margin:0.5rem 0;">Scale images down if larger than: <input id="maxImageSizeEl" placeholder="Max image size (px)" style="max-width:70px;" oninput="localStorage.maxImageSize=this.value; updateTable()"></div>
<script>maxImageSizeEl.value = localStorage.maxImageSize || '1024';</script>
<div style="margin:0.5rem 0;">
Defaults:
<input id="serverOriginEl" placeholder="OpenAI-compatible server URL" style="width:300px;" oninput="localStorage.serverOrigin=this.value">
<script>serverOriginEl.value = localStorage.serverOrigin || '';</script>
<input id="modelNameEl" placeholder="Model name (huggingface repo)" style="width:300px;" oninput="localStorage.modelName=this.value">
<script>modelNameEl.value = localStorage.modelName || 'OpenGVLab/InternVL2-40B-AWQ';</script>
<input id="apiKeyEl" placeholder="API key (optional)" style="width:300px;" oninput="localStorage.apiKey=this.value">
<script>apiKeyEl.value = localStorage.apiKey || '';</script>
<input id="temperatureEl" placeholder="Temperature (defaults to 1)" style="width:300px;" oninput="localStorage.temperature=this.value">
<script>temperatureEl.value = localStorage.temperature || '';</script>
</div>
<button id="imageLoader"><b>1. Select test images</b></button>
<button onclick="computeCaptions()"><b>2. Compute captions for all columns</b></button>
<table id="captionTable" style="margin-top:0.5rem;">
<tr>
<th>Images</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt1ServerOriginEl" oninput="localStorage.prompt1ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt1ModelNameEl" oninput="localStorage.prompt1ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt1ApiKeyEl" oninput="localStorage.prompt1ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 1</div>
<textarea id="systemPrompt1El" placeholder="System prompt 1, e.g. 'You are an expert image captioner, ...'" oninput="localStorage.systemPrompt1=this.value"></textarea>
<textarea id="prompt1El" placeholder="Prompt 1, e.g. 'Caption this image.'" oninput="localStorage.prompt1=this.value"></textarea>
<textarea id="prefix1El" placeholder="(Optional) Prefix 1, e.g. 'It's an image of'" oninput="localStorage.prefix1=this.value"></textarea>
<script>prompt1El.value = localStorage.prompt1 || ''; prefix1El.value = localStorage.prefix1 || ''; systemPrompt1El.value = localStorage.systemPrompt1 || ''; prompt1ServerOriginEl.value = localStorage.prompt1ServerOriginOverride || ''; prompt1ModelNameEl.value = localStorage.prompt1ModelNameOverride || '';; prompt1ApiKeyEl.value = localStorage.prompt1ApiKeyOverride || '';</script>
<button onclick="computeCaptions(0); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt2ServerOriginEl" oninput="localStorage.prompt2ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt2ModelNameEl" oninput="localStorage.prompt2ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt2ApiKeyEl" oninput="localStorage.prompt2ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 2</div>
<textarea id="systemPrompt2El" placeholder="System prompt 2, e.g. 'You are an assistant who accurately ...'" oninput="localStorage.systemPrompt2=this.value"></textarea>
<textarea id="prompt2El" placeholder="Prompt 2, e.g. 'Describe this image in extreme detail.'" oninput="localStorage.prompt2=this.value"></textarea>
<textarea id="prefix2El" placeholder="(Optional) Prefix 2, e.g. 'The image depicts'" oninput="localStorage.prefix2=this.value"></textarea>
<script>prompt2El.value = localStorage.prompt2 || ''; prefix2El.value = localStorage.prefix2 || ''; systemPrompt2El.value = localStorage.systemPrompt2 || ''; prompt2ServerOriginEl.value = localStorage.prompt2ServerOriginOverride || ''; prompt2ModelNameEl.value = localStorage.prompt2ModelNameOverride || ''; prompt2ApiKeyEl.value = localStorage.prompt2ApiKeyOverride || '';</script>
<button onclick="computeCaptions(1); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt3ServerOriginEl" oninput="localStorage.prompt3ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt3ModelNameEl" oninput="localStorage.prompt3ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt3ApiKeyEl" oninput="localStorage.prompt3ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 3</div>
<textarea id="systemPrompt3El" placeholder="System prompt 3" oninput="localStorage.systemPrompt3=this.value"></textarea>
<textarea id="prompt3El" placeholder="Prompt 3" oninput="localStorage.prompt3=this.value"></textarea>
<textarea id="prefix3El" placeholder="Prefix 3" oninput="localStorage.prefix3=this.value"></textarea>
<script>prompt3El.value = localStorage.prompt3 || ''; prefix3El.value = localStorage.prefix3 || ''; systemPrompt3El.value = localStorage.systemPrompt3 || ''; prompt3ServerOriginEl.value = localStorage.prompt3ServerOriginOverride || ''; prompt3ModelNameEl.value = localStorage.prompt3ModelNameOverride || ''; prompt3ApiKeyEl.value = localStorage.prompt3ApiKeyOverride || '';</script>
<button onclick="computeCaptions(2); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt4ServerOriginEl" oninput="localStorage.prompt4ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt4ModelNameEl" oninput="localStorage.prompt4ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt4ApiKeyEl" oninput="localStorage.prompt4ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 4</div>
<textarea id="systemPrompt4El" placeholder="System prompt 4" oninput="localStorage.systemPrompt4=this.value"></textarea>
<textarea id="prompt4El" placeholder="Prompt 4" oninput="localStorage.prompt4=this.value"></textarea>
<textarea id="prefix4El" placeholder="Prefix 4" oninput="localStorage.prefix4=this.value"></textarea>
<script>prompt4El.value = localStorage.prompt4 || ''; prefix4El.value = localStorage.prefix4 || ''; systemPrompt4El.value = localStorage.systemPrompt4 || ''; prompt4ServerOriginEl.value = localStorage.prompt4ServerOriginOverride || ''; prompt4ModelNameEl.value = localStorage.prompt4ModelNameOverride || ''; prompt4ApiKeyEl.value = localStorage.prompt4ApiKeyOverride || '';</script>
<button onclick="computeCaptions(3); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt5ServerOriginEl" oninput="localStorage.prompt5ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt5ModelNameEl" oninput="localStorage.prompt5ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt5ApiKeyEl" oninput="localStorage.prompt5ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 5</div>
<textarea id="systemPrompt5El" placeholder="System prompt 5" oninput="localStorage.systemPrompt5=this.value"></textarea>
<textarea id="prompt5El" placeholder="Prompt 5" oninput="localStorage.prompt5=this.value"></textarea>
<textarea id="prefix5El" placeholder="Prefix 5" oninput="localStorage.prefix5=this.value"></textarea>
<script>prompt5El.value = localStorage.prompt5 || ''; prefix5El.value = localStorage.prefix5 || ''; systemPrompt5El.value = localStorage.systemPrompt5 || ''; prompt5ServerOriginEl.value = localStorage.prompt5ServerOriginOverride || ''; prompt5ModelNameEl.value = localStorage.prompt5ModelNameOverride || ''; prompt5ApiKeyEl.value = localStorage.prompt5ApiKeyOverride || '';</script>
<button onclick="computeCaptions(4); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
</tr>
</table>
<script>
window.testImages = [];
let imageHandles = [];
const fileInput = Object.assign(document.createElement('input'), {
type: 'file', multiple: true,
accept: '.png,.gif,.jpeg,.jpg,.webp,.avif,.svg,image/*',
style: 'display:none',
});
document.body.appendChild(fileInput);
document.getElementById('imageLoader').addEventListener('click', async () => {
window.testImages = [];
try {
imageHandles = await window.showOpenFilePicker({
multiple: true,
types: [{ description: 'Images', accept: { 'image/*': ['.png','.gif','.jpeg','.jpg','.webp','.avif','.svg'] }}]
});
} catch {
fileInput.click();
return;
}
let { set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js');
await set('imageHandles', imageHandles);
await loadImagesFromHandles();
});
fileInput.addEventListener('change', async ({ target: { files } }) => {
if (!files.length) return;
imageHandles = Array.from(files).map(file => ({ getFile: async () => file, kind: 'file', name: file.name }));
for (const handle of imageHandles) {
const file = await handle.getFile();
const dataUrl = await fileToDataUrl(file);
window.testImages.push(dataUrl);
}
updateTable();
});
async function loadImagesFromHandles() {
window.testImages = [];
for (const handle of imageHandles) {
const file = await handle.getFile();
const dataUrl = await fileToDataUrl(file);
window.testImages.push(dataUrl);
}
updateTable();
}
function fileToDataUrl(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
// Add this function to load images when the page loads
async function loadSavedImages() {
let { get, set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js');
imageHandles = await get('imageHandles') || [];
if (imageHandles.length > 0) {
await loadImagesFromHandles();
}
}
// Call this function when the page loads
window.addEventListener('load', async () => {
// await new Promise(rs => document.addEventListener('click', rs, { once: true }));
loadSavedImages();
});
function resizeDataUrl({ dataUrl, maxWidth, maxHeight }) {
return new Promise((resolve) => {
const img = new Image();
img.onload = () => {
const canvas = document.createElement('canvas');
let { width, height } = img;
if (width > maxWidth || height > maxHeight) {
const ratio = Math.min(maxWidth / width, maxHeight / height);
width *= ratio;
height *= ratio;
}
canvas.width = width;
canvas.height = height;
canvas.getContext('2d').drawImage(img, 0, 0, width, height);
resolve(canvas.toDataURL("image/jpeg"));
};
img.src = dataUrl;
});
};
async function updateTable() {
const table = document.getElementById('captionTable');
// Clear existing rows except header
while (table.rows.length > 1) {
table.deleteRow(1);
}
for (let i = 0; i < window.testImages.length; i++) {
const row = table.insertRow(-1);
const cell = row.insertCell(0);
const img = document.createElement('img');
img.style.pointerEvents = "auto";
let maxImageSize = Number(maxImageSizeEl.value);
let imageUrl = await resizeDataUrl({ dataUrl: window.testImages[i], maxWidth: maxImageSize, maxHeight: maxImageSize });
img.src = imageUrl;
cell.appendChild(img);
cell.style.pointerEvents = "none";
cell.style.position = 'sticky';
cell.style.left = '0';
for (let j = 0; j < 5; j++) {
const cell = row.insertCell(-1);
cell.textContent = `Caption will appear here`;
}
}
}
async function computeCaptions(columnI=null) {
if(window.testImages.length === 0) return alert("Choose images first.");
if(!serverOriginEl.value.startsWith("https://") && !serverOriginEl.value.startsWith("http://")) return alert("Please enter a valid server URL. It should start with 'https://' or 'http://'.");
const serverOrigins = [
prompt1ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt2ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt3ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt4ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt5ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
];
for(let i = 0; i < serverOrigins.length; i++) {
serverOrigins[i] = serverOrigins[i].trim().replace(/\/$/, "");
if(serverOrigins[i].startsWith("https://generativelanguage.googleapis.com")) serverOrigins[i] = "https://generativelanguage.googleapis.com";
}
const modelNames = [
prompt1ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt2ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt3ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt4ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt5ModelNameEl.value.trim() || modelNameEl.value.trim(),
];
for(let i = 0; i < modelNames.length; i++) {
if(modelNames[i].startsWith("https://huggingface.co/")) modelNames[i] = modelNames[i].replace("https://huggingface.co/", "")
modelNames[i] = modelNames[i].replace(/\/$/, ""); // remove trailing slash
}
const apiKeys = [
prompt1ApiKeyEl.value || apiKeyEl.value.trim(),
prompt2ApiKeyEl.value || apiKeyEl.value.trim(),
prompt3ApiKeyEl.value || apiKeyEl.value.trim(),
prompt4ApiKeyEl.value || apiKeyEl.value.trim(),
prompt5ApiKeyEl.value || apiKeyEl.value.trim(),
];
const systemPrompts = [
systemPrompt1El.value || '',
systemPrompt2El.value || '',
systemPrompt3El.value || '',
systemPrompt4El.value || '',
systemPrompt5El.value || '',
];
const prompts = [
prompt1El.value || '',
prompt2El.value || '',
prompt3El.value || '',
prompt4El.value || '',
prompt5El.value || '',
];
const prefixes = [
prefix1El.value || '',
prefix2El.value || '',
prefix3El.value || '',
prefix4El.value || '',
prefix5El.value || '',
];
const table = document.getElementById('captionTable');
for (let i = 1; i < table.rows.length; i++) {
for (let j = 1; j < 6; j++) {
if(columnI !== null && j !== columnI+1) continue;
let systemPrompt = systemPrompts[j-1].trim();
let prompt = prompts[j-1].trim();
let prefix = prefixes[j-1].trim();
let serverOrigin = serverOrigins[j-1].trim();
let modelName = modelNames[j-1].trim();
let apiKey = apiKeys[j-1].trim();
if(!systemPrompt && !prompt && !prefix) continue;
const cell = table.rows[i].cells[j];
cell.innerHTML = `⏳ Computing...<br><progress></progress>`;
computeCaption(window.testImages[i-1], systemPrompt, prompt, prefix, serverOrigin, modelName, apiKey).then(caption => {
cell.textContent = caption;
});
}
}
}
async function computeCaption(imageUrl, systemPrompt, prompt, prefix, serverOrigin, modelName, apiKey) {
let maxImageSize = Number(maxImageSizeEl.value);
let originalImageSize = imageUrl.length;
imageUrl = await resizeDataUrl({ dataUrl: imageUrl, maxWidth: maxImageSize, maxHeight: maxImageSize });
console.log(`original size: ${originalImageSize}, new size: ${imageUrl.length}`);
let messages = [];
if(systemPrompt.trim()) messages.push({ role:"system", content:systemPrompt.trim() });
messages.push({
role: "user",
content: [
{ "type": "text", "text": prompt.trim() },
{ "type": "image_url", "image_url": { "url": imageUrl } },
],
});
if(prefix.trim()) messages.push({ role:"assistant", content:prefix.trim() });
let headers = {
"content-type": "application/json",
"Authorization": `Bearer ${apiKey}`,
};
if(serverOrigin.trim().includes("api.anthropic.com/")) headers["anthropic-dangerous-direct-browser-access"] = "true";
let temperature = Number(temperatureEl.value.trim() || 1);
if(isNaN(temperature)) temperature = 1;
let startTime = Date.now();
let result = await fetch(`${serverOrigin}${serverOrigin === "https://generativelanguage.googleapis.com" ? "/v1beta/openai/chat/completions" : "/v1/chat/completions"}`, {
headers,
body: JSON.stringify({
model: modelName.trim(),
temperature,
stream: false,
messages,
}),
method: "POST",
}).then(r => r.json()).catch(e => console.error("Error computing caption:", e));
if(result?.object === "error") return alert(result.message);
console.log(result.choices[0].message.content);
console.log("tokens:", result.usage.total_tokens);
console.log("time:", Date.now() - startTime);
return (prefix || "") + result.choices[0].message.content;
}
</script>
</body>
</html> |