Spaces:
Running
Running
File size: 21,924 Bytes
b09f300 23978d3 b09f300 ef30362 b09f300 0b04f6b b09f300 860213a dc97950 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 8599e34 b09f300 3e41ed7 b09f300 fabf72e 8164d28 b09f300 fabf72e b09f300 fabf72e b09f300 fabf72e b09f300 fabf72e 0aec3e5 b09f300 8fcf892 b09f300 8fcf892 812cad0 b09f300 3e41ed7 b09f300 3e41ed7 b09f300 812cad0 b09f300 3e41ed7 b09f300 3e41ed7 b09f300 860213a 084cc2f 3e41ed7 084cc2f dc97950 b09f300 8fcf892 084cc2f b09f300 dc97950 b09f300 860213a b09f300 |
|
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="referrer" content="no-referrer">
<title>A/B Test Captioning UI</title>
<style>
html {
color-scheme: light dark;
font-family: sans-serif;
}
table {
border-collapse: collapse;
min-width: 3000px;
}
table th {
text-align: center;
}
table td {
text-align: left;
white-space: pre-wrap;
}
button {
cursor: pointer;
}
th,
td {
border: 1px solid light-dark(black, #515151);
padding: 8px;
vertical-align: top;
min-width: 500px;
max-width: 500px;
}
textarea {
width: 100%;
box-sizing: border-box;
height: 60px;
}
img {
max-width: min(100%, 20vw);
}
</style>
</head>
<body>
<script>
window.hashParams = document.location.hash ? Object.fromEntries(document.location.hash.slice(1).split(";").map(kv => kv.split("="))) : ({});
if(window.hashParams.serverOrigin) localStorage.serverOrigin = window.hashParams.serverOrigin;
if(window.hashParams.modelName) localStorage.modelName = window.hashParams.modelName;
if(window.hashParams.apiKey) localStorage.apiKey = window.hashParams.apiKey;
</script>
<button onclick="readmeCtn.hidden=!readmeCtn.hidden">ℹ️ toggle readme</button>
<pre hidden id="readmeCtn">
<b>Note: This should work for any OpenAI-compatible API server.</b>
<b>STEP 1:</b> Start an OpenAI-compatible API server, for example you can use these params on Runpod:
<span style="opacity:0.5">IMAGE:</span> openmmlab/lmdeploy:v0.6.2.post1-cu12
<span style="opacity:0.5">COMMAND:</span> bash -c "pip install timm==1.0.7 && lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --model-name OpenGVLab/InternVL2-40B-AWQ --backend turbomind --server-port 3000 --model-format awq --quant-policy 4"
<span style="opacity:0.5">VOLUME PATH:</span> /root
<span style="opacity:0.5">CONTAINER DISK:</span> 10GB
<span style="opacity:0.5">VOLUME DISK:</span> 60GB
<span style="opacity:0.5">PORT:</span> 3000
<b>STEP 2:</b> Enter your server endpoint url in the input below (e.g. something like like 'https://uf1kmzigq0p5bd-3000.proxy.runpod.net' if you're using the above server command).
<b>STEP 3:</b> Enter a system prompt and a prompt at the top of one or more columns, and optionally a prefix that you want the resulting caption to start with.
<b>STEP 4:</b> Select some images to test your prompt on (Ctrl+Click to select multiple images).
<b>STEP 5:</b> Click 'Compute captions for all columns' or click one of the individual column buttons.
</pre>
<br><br>
<div style="margin:0.5rem 0;">Scale images down if larger than: <input id="maxImageSizeEl" placeholder="Max image size (px)" style="max-width:70px;" oninput="localStorage.maxImageSize=this.value; updateTable()"></div>
<script>maxImageSizeEl.value = localStorage.maxImageSize || '1024';</script>
<div style="margin:0.5rem 0;">
Defaults:
<input id="serverOriginEl" placeholder="OpenAI-compatible server URL" style="width:300px;" oninput="localStorage.serverOrigin=this.value">
<script>serverOriginEl.value = localStorage.serverOrigin || '';</script>
<input id="modelNameEl" placeholder="Model name (huggingface repo)" style="width:300px;" oninput="localStorage.modelName=this.value">
<script>modelNameEl.value = localStorage.modelName || 'OpenGVLab/InternVL2-40B-AWQ';</script>
<input id="apiKeyEl" placeholder="API key (optional)" style="width:300px;" oninput="localStorage.apiKey=this.value">
<script>apiKeyEl.value = localStorage.apiKey || '';</script>
<input id="temperatureEl" placeholder="Temperature (defaults to 1)" style="width:300px;" oninput="localStorage.temperature=this.value">
<script>temperatureEl.value = localStorage.temperature || '';</script>
</div>
<button id="imageLoader"><b>1. Select test images</b></button>
<button onclick="computeCaptions()"><b>2. Compute captions for all columns</b></button>
<table id="captionTable" style="margin-top:0.5rem;">
<tr>
<th>Images</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt1ServerOriginEl" oninput="localStorage.prompt1ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt1ModelNameEl" oninput="localStorage.prompt1ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt1ApiKeyEl" oninput="localStorage.prompt1ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 1</div>
<textarea id="systemPrompt1El" placeholder="System prompt 1, e.g. 'You are an expert image captioner, ...'" oninput="localStorage.systemPrompt1=this.value"></textarea>
<textarea id="prompt1El" placeholder="Prompt 1, e.g. 'Caption this image.'" oninput="localStorage.prompt1=this.value"></textarea>
<textarea id="prefix1El" placeholder="(Optional) Prefix 1, e.g. 'It's an image of'" oninput="localStorage.prefix1=this.value"></textarea>
<script>prompt1El.value = localStorage.prompt1 || ''; prefix1El.value = localStorage.prefix1 || ''; systemPrompt1El.value = localStorage.systemPrompt1 || ''; prompt1ServerOriginEl.value = localStorage.prompt1ServerOriginOverride || ''; prompt1ModelNameEl.value = localStorage.prompt1ModelNameOverride || '';; prompt1ApiKeyEl.value = localStorage.prompt1ApiKeyOverride || '';</script>
<button onclick="computeCaptions(0); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt2ServerOriginEl" oninput="localStorage.prompt2ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt2ModelNameEl" oninput="localStorage.prompt2ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt2ApiKeyEl" oninput="localStorage.prompt2ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 2</div>
<textarea id="systemPrompt2El" placeholder="System prompt 2, e.g. 'You are an assistant who accurately ...'" oninput="localStorage.systemPrompt2=this.value"></textarea>
<textarea id="prompt2El" placeholder="Prompt 2, e.g. 'Describe this image in extreme detail.'" oninput="localStorage.prompt2=this.value"></textarea>
<textarea id="prefix2El" placeholder="(Optional) Prefix 2, e.g. 'The image depicts'" oninput="localStorage.prefix2=this.value"></textarea>
<script>prompt2El.value = localStorage.prompt2 || ''; prefix2El.value = localStorage.prefix2 || ''; systemPrompt2El.value = localStorage.systemPrompt2 || ''; prompt2ServerOriginEl.value = localStorage.prompt2ServerOriginOverride || ''; prompt2ModelNameEl.value = localStorage.prompt2ModelNameOverride || ''; prompt2ApiKeyEl.value = localStorage.prompt2ApiKeyOverride || '';</script>
<button onclick="computeCaptions(1); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt3ServerOriginEl" oninput="localStorage.prompt3ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt3ModelNameEl" oninput="localStorage.prompt3ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt3ApiKeyEl" oninput="localStorage.prompt3ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 3</div>
<textarea id="systemPrompt3El" placeholder="System prompt 3" oninput="localStorage.systemPrompt3=this.value"></textarea>
<textarea id="prompt3El" placeholder="Prompt 3" oninput="localStorage.prompt3=this.value"></textarea>
<textarea id="prefix3El" placeholder="Prefix 3" oninput="localStorage.prefix3=this.value"></textarea>
<script>prompt3El.value = localStorage.prompt3 || ''; prefix3El.value = localStorage.prefix3 || ''; systemPrompt3El.value = localStorage.systemPrompt3 || ''; prompt3ServerOriginEl.value = localStorage.prompt3ServerOriginOverride || ''; prompt3ModelNameEl.value = localStorage.prompt3ModelNameOverride || ''; prompt3ApiKeyEl.value = localStorage.prompt3ApiKeyOverride || '';</script>
<button onclick="computeCaptions(2); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt4ServerOriginEl" oninput="localStorage.prompt4ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt4ModelNameEl" oninput="localStorage.prompt4ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt4ApiKeyEl" oninput="localStorage.prompt4ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 4</div>
<textarea id="systemPrompt4El" placeholder="System prompt 4" oninput="localStorage.systemPrompt4=this.value"></textarea>
<textarea id="prompt4El" placeholder="Prompt 4" oninput="localStorage.prompt4=this.value"></textarea>
<textarea id="prefix4El" placeholder="Prefix 4" oninput="localStorage.prefix4=this.value"></textarea>
<script>prompt4El.value = localStorage.prompt4 || ''; prefix4El.value = localStorage.prefix4 || ''; systemPrompt4El.value = localStorage.systemPrompt4 || ''; prompt4ServerOriginEl.value = localStorage.prompt4ServerOriginOverride || ''; prompt4ModelNameEl.value = localStorage.prompt4ModelNameOverride || ''; prompt4ApiKeyEl.value = localStorage.prompt4ApiKeyOverride || '';</script>
<button onclick="computeCaptions(3); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
<th>
<div style="display:flex;"><input style="flex-grow:1; max-width:150px;" id="prompt5ServerOriginEl" oninput="localStorage.prompt5ServerOriginOverride=this.value" placeholder="(Optional) Override server URL"><input style="flex-grow:1; min-width:90px;" id="prompt5ModelNameEl" oninput="localStorage.prompt5ModelNameOverride=this.value" placeholder="(Optional) Override model name"><input style="flex-grow:1; max-width:150px;" id="prompt5ApiKeyEl" oninput="localStorage.prompt5ApiKeyOverride=this.value" placeholder="(Optional) Override API key"></div>
<div style="text-align:center; font-weight:bold; margin-top:0.5rem;">Prompt 5</div>
<textarea id="systemPrompt5El" placeholder="System prompt 5" oninput="localStorage.systemPrompt5=this.value"></textarea>
<textarea id="prompt5El" placeholder="Prompt 5" oninput="localStorage.prompt5=this.value"></textarea>
<textarea id="prefix5El" placeholder="Prefix 5" oninput="localStorage.prefix5=this.value"></textarea>
<script>prompt5El.value = localStorage.prompt5 || ''; prefix5El.value = localStorage.prefix5 || ''; systemPrompt5El.value = localStorage.systemPrompt5 || ''; prompt5ServerOriginEl.value = localStorage.prompt5ServerOriginOverride || ''; prompt5ModelNameEl.value = localStorage.prompt5ModelNameOverride || ''; prompt5ApiKeyEl.value = localStorage.prompt5ApiKeyOverride || '';</script>
<button onclick="computeCaptions(4); this.disabled=true; setTimeout(() => this.disabled=false, 1000)">(re)compute captions for this column</button>
</th>
</tr>
</table>
<script>
window.testImages = [];
let imageHandles = [];
const fileInput = Object.assign(document.createElement('input'), {
type: 'file', multiple: true,
accept: '.png,.gif,.jpeg,.jpg,.webp,.avif,.svg,image/*',
style: 'display:none',
});
document.body.appendChild(fileInput);
document.getElementById('imageLoader').addEventListener('click', async () => {
window.testImages = [];
try {
imageHandles = await window.showOpenFilePicker({
multiple: true,
types: [{ description: 'Images', accept: { 'image/*': ['.png','.gif','.jpeg','.jpg','.webp','.avif','.svg'] }}]
});
} catch {
fileInput.click();
return;
}
let { set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js');
await set('imageHandles', imageHandles);
await loadImagesFromHandles();
});
fileInput.addEventListener('change', async ({ target: { files } }) => {
if (!files.length) return;
imageHandles = Array.from(files).map(file => ({ getFile: async () => file, kind: 'file', name: file.name }));
for (const handle of imageHandles) {
const file = await handle.getFile();
const dataUrl = await fileToDataUrl(file);
window.testImages.push(dataUrl);
}
updateTable();
});
async function loadImagesFromHandles() {
window.testImages = [];
for (const handle of imageHandles) {
const file = await handle.getFile();
const dataUrl = await fileToDataUrl(file);
window.testImages.push(dataUrl);
}
updateTable();
}
function fileToDataUrl(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
// Add this function to load images when the page loads
async function loadSavedImages() {
let { get, set } = await import('https://unpkg.com/[email protected]/dist/esm/index.js');
imageHandles = await get('imageHandles') || [];
if (imageHandles.length > 0) {
await loadImagesFromHandles();
}
}
// Call this function when the page loads
window.addEventListener('load', async () => {
// await new Promise(rs => document.addEventListener('click', rs, { once: true }));
loadSavedImages();
});
function resizeDataUrl({ dataUrl, maxWidth, maxHeight }) {
return new Promise((resolve) => {
const img = new Image();
img.onload = () => {
const canvas = document.createElement('canvas');
let { width, height } = img;
if (width > maxWidth || height > maxHeight) {
const ratio = Math.min(maxWidth / width, maxHeight / height);
width *= ratio;
height *= ratio;
}
canvas.width = width;
canvas.height = height;
canvas.getContext('2d').drawImage(img, 0, 0, width, height);
resolve(canvas.toDataURL("image/jpeg"));
};
img.src = dataUrl;
});
};
async function updateTable() {
const table = document.getElementById('captionTable');
// Clear existing rows except header
while (table.rows.length > 1) {
table.deleteRow(1);
}
for (let i = 0; i < window.testImages.length; i++) {
const row = table.insertRow(-1);
const cell = row.insertCell(0);
const img = document.createElement('img');
img.style.pointerEvents = "auto";
let maxImageSize = Number(maxImageSizeEl.value);
let imageUrl = await resizeDataUrl({ dataUrl: window.testImages[i], maxWidth: maxImageSize, maxHeight: maxImageSize });
img.src = imageUrl;
cell.appendChild(img);
cell.style.pointerEvents = "none";
cell.style.position = 'sticky';
cell.style.left = '0';
for (let j = 0; j < 5; j++) {
const cell = row.insertCell(-1);
cell.textContent = `Caption will appear here`;
}
}
}
async function computeCaptions(columnI=null) {
if(window.testImages.length === 0) return alert("Choose images first.");
if(!serverOriginEl.value.startsWith("https://") && !serverOriginEl.value.startsWith("http://")) return alert("Please enter a valid server URL. It should start with 'https://' or 'http://'.");
const serverOrigins = [
prompt1ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt2ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt3ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt4ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
prompt5ServerOriginEl.value.trim() || serverOriginEl.value.trim(),
];
for(let i = 0; i < serverOrigins.length; i++) {
serverOrigins[i] = serverOrigins[i].trim().replace(/\/$/, "");
if(serverOrigins[i].startsWith("https://generativelanguage.googleapis.com")) serverOrigins[i] = "https://generativelanguage.googleapis.com";
}
const modelNames = [
prompt1ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt2ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt3ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt4ModelNameEl.value.trim() || modelNameEl.value.trim(),
prompt5ModelNameEl.value.trim() || modelNameEl.value.trim(),
];
for(let i = 0; i < modelNames.length; i++) {
if(modelNames[i].startsWith("https://huggingface.co/")) modelNames[i] = modelNames[i].replace("https://huggingface.co/", "")
modelNames[i] = modelNames[i].replace(/\/$/, ""); // remove trailing slash
}
const apiKeys = [
prompt1ApiKeyEl.value || apiKeyEl.value.trim(),
prompt2ApiKeyEl.value || apiKeyEl.value.trim(),
prompt3ApiKeyEl.value || apiKeyEl.value.trim(),
prompt4ApiKeyEl.value || apiKeyEl.value.trim(),
prompt5ApiKeyEl.value || apiKeyEl.value.trim(),
];
const systemPrompts = [
systemPrompt1El.value || '',
systemPrompt2El.value || '',
systemPrompt3El.value || '',
systemPrompt4El.value || '',
systemPrompt5El.value || '',
];
const prompts = [
prompt1El.value || '',
prompt2El.value || '',
prompt3El.value || '',
prompt4El.value || '',
prompt5El.value || '',
];
const prefixes = [
prefix1El.value || '',
prefix2El.value || '',
prefix3El.value || '',
prefix4El.value || '',
prefix5El.value || '',
];
const table = document.getElementById('captionTable');
for (let i = 1; i < table.rows.length; i++) {
for (let j = 1; j < 6; j++) {
if(columnI !== null && j !== columnI+1) continue;
let systemPrompt = systemPrompts[j-1].trim();
let prompt = prompts[j-1].trim();
let prefix = prefixes[j-1].trim();
let serverOrigin = serverOrigins[j-1].trim();
let modelName = modelNames[j-1].trim();
let apiKey = apiKeys[j-1].trim();
if(!systemPrompt && !prompt && !prefix) continue;
const cell = table.rows[i].cells[j];
cell.innerHTML = `⏳ Computing...<br><progress></progress>`;
computeCaption(window.testImages[i-1], systemPrompt, prompt, prefix, serverOrigin, modelName, apiKey).then(caption => {
cell.textContent = caption;
});
}
}
}
async function computeCaption(imageUrl, systemPrompt, prompt, prefix, serverOrigin, modelName, apiKey) {
let maxImageSize = Number(maxImageSizeEl.value);
let originalImageSize = imageUrl.length;
imageUrl = await resizeDataUrl({ dataUrl: imageUrl, maxWidth: maxImageSize, maxHeight: maxImageSize });
console.log(`original size: ${originalImageSize}, new size: ${imageUrl.length}`);
let messages = [];
if(systemPrompt.trim()) messages.push({ role:"system", content:systemPrompt.trim() });
messages.push({
role: "user",
content: [
{ "type": "text", "text": prompt.trim() },
{ "type": "image_url", "image_url": { "url": imageUrl } },
],
});
if(prefix.trim()) messages.push({ role:"assistant", content:prefix.trim() });
let headers = {
"content-type": "application/json",
"Authorization": `Bearer ${apiKey}`,
};
if(serverOrigin.trim().includes("api.anthropic.com/")) headers["anthropic-dangerous-direct-browser-access"] = "true";
let temperature = Number(temperatureEl.value.trim() || 1);
if(isNaN(temperature)) temperature = 1;
let startTime = Date.now();
let result = await fetch(`${serverOrigin}${serverOrigin === "https://generativelanguage.googleapis.com" ? "/v1beta/openai/chat/completions" : "/v1/chat/completions"}`, {
headers,
body: JSON.stringify({
model: modelName.trim(),
temperature,
stream: false,
messages,
}),
method: "POST",
}).then(r => r.json()).catch(e => console.error("Error computing caption:", e));
if(result?.object === "error") return alert(result.message);
console.log(result.choices[0].message.content);
console.log("tokens:", result.usage.total_tokens);
console.log("time:", Date.now() - startTime);
return (prefix || "") + result.choices[0].message.content;
}
</script>
</body>
</html> |