|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Short Text and Open Source: Anonymiser</title> |
|
<script src="https://cdn.tailwindcss.com"></script> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/iconify/2.0.0/iconify.min.js"></script> |
|
<style> |
|
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap'); |
|
* { |
|
font-family: 'Inter', sans-serif; |
|
} |
|
textarea, #privacyMask { |
|
transition: all 0.2s ease-in-out; |
|
} |
|
::-webkit-scrollbar { |
|
width: 6px; |
|
} |
|
::-webkit-scrollbar-track { |
|
background: #2d2d2d; |
|
} |
|
::-webkit-scrollbar-thumb { |
|
background: #4a4a4a; |
|
border-radius: 3px; |
|
} |
|
.entity-tile { |
|
transition: transform 0.2s, box-shadow 0.2s; |
|
} |
|
.entity-tile:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
</style> |
|
</head> |
|
<body class="bg-gray-900 min-h-screen"> |
|
|
|
<div class="bg-black/30 py-4 border-b border-white/10"> |
|
<div class="max-w-7xl mx-auto px-4 flex items-center justify-between"> |
|
<div class="flex items-center space-x-3"> |
|
<img src="ai4privacy-logo.png" alt="Logo" class="h-8 w-8"> |
|
<div> |
|
<span class="text-xl font-bold text-white">Ai4Privacy</span> |
|
<span class="block text-xs text-white/60">Short Text Anonymization Locally in Your Browser</span> |
|
</div> |
|
</div> |
|
|
|
<button id="settingsButton" class="text-white/60 hover:text-white transition-colors"> |
|
<span class="iconify" data-icon="mdi:cog" data-width="24"></span> |
|
</button> |
|
</div> |
|
</div> |
|
|
|
|
|
<div id="settingsPanel" class="hidden absolute right-4 top-20 bg-gray-800 border border-white/10 rounded-xl p-4 w-64 space-y-4 z-50"> |
|
<div> |
|
<label class="block text-sm text-white/80 mb-2">Detection Threshold</label> |
|
<input type="number" id="thresholdInput" step="0.001" min="0" max="1" value="0.01" |
|
class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white"> |
|
</div> |
|
<div> |
|
<label class="block text-sm text-white/80 mb-2">Language Model</label> |
|
<select id="modelSelect" class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white"> |
|
<option value="english">English - ai4privacy/llama-ai4privacy-english-anonymiser-openpii</option> |
|
</select> |
|
</div> |
|
</div> |
|
|
|
<div class="max-w-7xl mx-auto px-4 py-8"> |
|
<div class="flex flex-col lg:flex-row gap-8"> |
|
|
|
<div class="flex-1 space-y-6"> |
|
<div> |
|
<label class="block text-sm font-medium text-white/80 mb-2">Input Text</label> |
|
<textarea |
|
id="inputText" |
|
class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white placeholder-white/30 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/30 resize-none" |
|
rows="6" |
|
placeholder="Enter sensitive text to anonymize..." |
|
></textarea> |
|
</div> |
|
|
|
<div> |
|
<label class="block text-sm font-medium text-white/80 mb-2">Anonymized Output</label> |
|
<textarea |
|
id="outputText" |
|
class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white/80 resize-none" |
|
rows="6" |
|
readonly |
|
></textarea> |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="lg:w-96"> |
|
<div class="sticky top-8"> |
|
<label class="block text-sm font-medium text-white/80 mb-2">Detected Entities</label> |
|
<div class="bg-gray-800 border border-white/10 rounded-xl p-4"> |
|
<div class="mb-4"> |
|
<span id="processingStatus" class="text-xs text-white/40">Ready</span> |
|
</div> |
|
<div |
|
id="privacyMask" |
|
class="h-96 bg-gray-850 rounded-lg p-3 overflow-y-auto text-sm space-y-2" |
|
> |
|
<div class="text-center text-white/40 py-4">Processing results will appear here</div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
<div class="fixed bottom-0 left-0 right-0 bg-black/30 border-t border-white/10 py-3"> |
|
<div class="max-w-7xl mx-auto px-4"> |
|
<div class="flex items-center justify-between"> |
|
<div class="text-sm text-white/50">© 2025 Ai4Privacy. All rights reserved. Use at your own risk. Ai4Privacy assumes no responsibility for implementation, accuracy, or any resulting damages.</div> |
|
<div class="flex items-center space-x-4"> |
|
<span class="text-sm text-white/50">v2.1.0</span> |
|
<div class="w-px h-4 bg-white/10"></div> |
|
<img src="ai4privacy-logo.png" alt="Logo" class="h-6 w-6 opacity-70"> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<script type="module"> |
|
import { AutoModel, AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]'; |
|
|
|
|
|
let tokenizer, model; |
|
let isModelLoaded = false; |
|
let currentInput = ""; |
|
|
|
|
|
const inputText = document.getElementById('inputText'); |
|
const outputText = document.getElementById('outputText'); |
|
const statusElement = document.getElementById('processingStatus'); |
|
|
|
|
|
let timeout; |
|
inputText.addEventListener('input', (event) => { |
|
currentInput = event.target.value; |
|
statusElement.textContent = 'Processing...'; |
|
clearTimeout(timeout); |
|
timeout = setTimeout(updateOutput, 300); |
|
}); |
|
|
|
async function loadModel() { |
|
try { |
|
tokenizer = await AutoTokenizer.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii'); |
|
model = await AutoModel.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii', { dtype: "q8" }); |
|
isModelLoaded = true; |
|
statusElement.textContent = 'Model loaded'; |
|
updateOutput(); |
|
} catch (err) { |
|
console.error("Error loading model:", err); |
|
statusElement.textContent = 'Error loading model'; |
|
outputText.value = "Error loading model."; |
|
} |
|
} |
|
|
|
async function updateOutput() { |
|
if (!isModelLoaded) { |
|
statusElement.textContent = 'Loading model...'; |
|
outputText.value = ""; |
|
return; |
|
} |
|
|
|
try { |
|
const processed = await processText(currentInput, tokenizer, model); |
|
statusElement.textContent = `Processed ${currentInput.length} characters`; |
|
outputText.value = processed.maskedText; |
|
|
|
const privacyMaskDiv = document.getElementById('privacyMask'); |
|
privacyMaskDiv.innerHTML = ''; |
|
|
|
if (processed.replacements.length > 0) { |
|
processed.replacements.forEach(replacement => { |
|
const tile = document.createElement('div'); |
|
tile.className = 'entity-tile bg-gray-800 p-3 rounded-lg border border-white/10 hover:border-white/20'; |
|
tile.innerHTML = ` |
|
<div class="text-xs text-white/60 mb-1">${replacement.placeholder}</div> |
|
<div class="text-sm text-white font-medium">${replacement.original}</div> |
|
<div class="text-xs text-white/40 mt-1">Sensitive Information</div> |
|
<div class="text-xs text-white/40 mt-1">Activation: ${Math.round(replacement.activation * 100)}%</div> |
|
`; |
|
privacyMaskDiv.appendChild(tile); |
|
}); |
|
} else { |
|
const emptyState = document.createElement('div'); |
|
emptyState.className = 'text-center text-white/40 py-4'; |
|
emptyState.textContent = 'No sensitive information detected.'; |
|
privacyMaskDiv.appendChild(emptyState); |
|
} |
|
} catch (err) { |
|
statusElement.textContent = 'Error processing text'; |
|
console.error("Error processing text:", err); |
|
outputText.value = "Error processing text."; |
|
} |
|
} |
|
|
|
async function processText(text, tokenizer, model) { |
|
const inputs = await tokenizer(text); |
|
const inputTokens = inputs.input_ids.data; |
|
const tokenStrings = Array.from(inputTokens).map(id => |
|
tokenizer.decode([id], { skip_special_tokens: false }) |
|
); |
|
|
|
const { logits } = await model(inputs); |
|
const logitsData = Array.from(logits.data); |
|
const numTokens = tokenStrings.length; |
|
const numClasses = 3; |
|
|
|
const logitsPerToken = []; |
|
for (let i = 0; i < numTokens; i++) { |
|
logitsPerToken.push(logitsData.slice(i * numClasses, (i + 1) * numClasses)); |
|
} |
|
|
|
function softmax(logits) { |
|
const expLogits = logits.map(Math.exp); |
|
const sumExp = expLogits.reduce((a, b) => a + b, 0); |
|
return expLogits.map(exp => exp / sumExp); |
|
} |
|
|
|
const tokenPredictions = tokenStrings.map((token, i) => { |
|
const probs = softmax(logitsPerToken[i]); |
|
const maxSensitive = Math.max(probs[0], probs[1]); |
|
return { |
|
token: token, |
|
start: i, |
|
end: i + 1, |
|
probabilities: { |
|
"B-PRIVATE": probs[0], |
|
"I-PRIVATE": probs[1], |
|
"O": probs[2] |
|
}, |
|
maxSensitiveScore: maxSensitive |
|
}; |
|
}); |
|
|
|
const aggregated = aggregatePrivacyTokens(tokenPredictions); |
|
const { maskedText, replacements } = maskText(tokenPredictions, aggregated); |
|
return { maskedText, replacements }; |
|
} |
|
|
|
function aggregatePrivacyTokens(tokenPredictions) { |
|
const threshold = parseFloat(document.getElementById('thresholdInput').value) || 0.01; |
|
const aggregated = []; |
|
let i = 0; |
|
const n = tokenPredictions.length; |
|
|
|
while (i < n) { |
|
const currentToken = tokenPredictions[i]; |
|
if (['[CLS]', '[SEP]'].includes(currentToken.token)) { |
|
i++; |
|
continue; |
|
} |
|
const startsWithSpace = currentToken.token.startsWith(' '); |
|
const isFirstWord = aggregated.length === 0 && i === 0; |
|
if (startsWithSpace || isFirstWord) { |
|
const group = { |
|
tokens: [currentToken], |
|
indices: [i], |
|
scores: [currentToken.maxSensitiveScore], |
|
startsWithSpace: startsWithSpace |
|
}; |
|
i++; |
|
while (i < n && |
|
!tokenPredictions[i].token.startsWith(' ') && |
|
!['[CLS]', '[SEP]'].includes(tokenPredictions[i].token)) { |
|
group.tokens.push(tokenPredictions[i]); |
|
group.indices.push(i); |
|
group.scores.push(tokenPredictions[i].maxSensitiveScore); |
|
i++; |
|
} |
|
if (Math.max(...group.scores) >= threshold) { |
|
aggregated.push(group); |
|
} |
|
} else { |
|
i++; |
|
} |
|
} |
|
return aggregated; |
|
} |
|
|
|
function maskText(tokenPredictions, aggregatedGroups) { |
|
const maskedTokens = []; |
|
const replacements = []; |
|
const maskedIndices = new Set(); |
|
let redactedCounter = 1; |
|
|
|
aggregatedGroups.forEach(group => { |
|
group.indices.forEach(idx => maskedIndices.add(idx)); |
|
}); |
|
|
|
tokenPredictions.forEach((token, idx) => { |
|
if (['[CLS]', '[SEP]'].includes(token.token)) return; |
|
if (maskedIndices.has(idx)) { |
|
const group = aggregatedGroups.find(g => g.indices[0] === idx); |
|
if (group) { |
|
const originalTokens = group.tokens.map(t => t.token); |
|
const originalText = originalTokens |
|
.map((token, i) => (i === 0 && group.startsWithSpace ? token.trimStart() : token)) |
|
.join(''); |
|
const placeholder = `[PII_${redactedCounter}]`; |
|
replacements.push({ |
|
original: originalText, |
|
placeholder: placeholder, |
|
activation: Math.max(...group.scores) |
|
}); |
|
redactedCounter++; |
|
const maskWithSpace = group.startsWithSpace ? ` ${placeholder}` : placeholder; |
|
maskedTokens.push(maskWithSpace); |
|
} |
|
} else { |
|
maskedTokens.push(token.token); |
|
} |
|
}); |
|
|
|
return { maskedText: maskedTokens.join('').replace(/\s+/g, ' ').trim(), replacements }; |
|
} |
|
|
|
|
|
loadModel(); |
|
|
|
|
|
const settingsButton = document.getElementById('settingsButton'); |
|
const settingsPanel = document.getElementById('settingsPanel'); |
|
let settingsVisible = false; |
|
|
|
settingsButton.addEventListener('click', (e) => { |
|
settingsVisible = !settingsVisible; |
|
settingsPanel.classList.toggle('hidden', !settingsVisible); |
|
e.stopPropagation(); |
|
}); |
|
|
|
document.addEventListener('click', (e) => { |
|
if (settingsVisible && !settingsPanel.contains(e.target)) { |
|
settingsPanel.classList.add('hidden'); |
|
settingsVisible = false; |
|
} |
|
}); |
|
</script> |
|
</body> |
|
</html> |