File size: 13,052 Bytes
95a61ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1b90242
95a61ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Short Text and Open Source: Anonymiser</title>
  <script src="https://cdn.tailwindcss.com"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/iconify/2.0.0/iconify.min.js"></script>
  <style>
    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap');
    * {
      font-family: 'Inter', sans-serif;
    }
    textarea, #privacyMask {
      transition: all 0.2s ease-in-out;
    }
    ::-webkit-scrollbar {
      width: 6px;
    }
    ::-webkit-scrollbar-track {
      background: #2d2d2d;
    }
    ::-webkit-scrollbar-thumb {
      background: #4a4a4a;
      border-radius: 3px;
    }
    .entity-tile {
      transition: transform 0.2s, box-shadow 0.2s;
    }
    .entity-tile:hover {
      transform: translateY(-2px);
      box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    }
  </style>
</head>
<body class="bg-gray-900 min-h-screen">
  <!-- Branding Header -->
  <div class="bg-black/30 py-4 border-b border-white/10">
    <div class="max-w-7xl mx-auto px-4 flex items-center justify-between">
      <div class="flex items-center space-x-3">
        <img src="ai4privacy-logo.png" alt="Logo" class="h-8 w-8">
        <div>
          <span class="text-xl font-bold text-white">Ai4Privacy</span>
          <span class="block text-xs text-white/60">Short Text Anonymization Locally in Your Browser</span>
        </div>
      </div>
      <!-- Settings Button -->
      <button id="settingsButton" class="text-white/60 hover:text-white transition-colors">
        <span class="iconify" data-icon="mdi:cog" data-width="24"></span>
      </button>
    </div>
  </div>

  <!-- Settings Panel -->
  <div id="settingsPanel" class="hidden absolute right-4 top-20 bg-gray-800 border border-white/10 rounded-xl p-4 w-64 space-y-4 z-50">
    <div>
      <label class="block text-sm text-white/80 mb-2">Detection Threshold</label>
      <input type="number" id="thresholdInput" step="0.001" min="0" max="1" value="0.01"
             class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white">
    </div>
    <div>
      <label class="block text-sm text-white/80 mb-2">Language Model</label>
      <select id="modelSelect" class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white">
        <option value="english">English - ai4privacy/llama-ai4privacy-english-anonymiser-openpii</option>
      </select>
    </div>
  </div>

  <div class="max-w-7xl mx-auto px-4 py-8">
    <div class="flex flex-col lg:flex-row gap-8">
      <!-- Input/Output Section -->
      <div class="flex-1 space-y-6">
        <div>
          <label class="block text-sm font-medium text-white/80 mb-2">Input Text</label>
          <textarea 
            id="inputText"
            class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white placeholder-white/30 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/30 resize-none"
            rows="6"
            placeholder="Enter sensitive text to anonymize..."
          ></textarea>
        </div>
        
        <div>
          <label class="block text-sm font-medium text-white/80 mb-2">Anonymized Output</label>
          <textarea 
            id="outputText"
            class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white/80 resize-none"
            rows="6"
            readonly
          ></textarea>
        </div>
      </div>

      <!-- Privacy Mask Panel -->
      <div class="lg:w-96">
        <div class="sticky top-8">
            <label class="block text-sm font-medium text-white/80 mb-2">Detected Entities</label>
          <div class="bg-gray-800 border border-white/10 rounded-xl p-4">
            <div class="mb-4">
              <span id="processingStatus" class="text-xs text-white/40">Ready</span>
            </div>
            <div 
              id="privacyMask"
              class="h-96 bg-gray-850 rounded-lg p-3 overflow-y-auto text-sm space-y-2"
            >
              <div class="text-center text-white/40 py-4">Processing results will appear here</div>
            </div>
          </div>
        </div>
      </div>
    </div>
  </div>

  <!-- Branding Footer -->
  <div class="fixed bottom-0 left-0 right-0 bg-black/30 border-t border-white/10 py-3">
    <div class="max-w-7xl mx-auto px-4">
      <div class="flex items-center justify-between">
        <div class="text-sm text-white/50">© 2025 Ai4Privacy. All rights reserved. Use at your own risk. Ai4Privacy assumes no responsibility for implementation, accuracy, or any resulting damages.</div>
        <div class="flex items-center space-x-4">
          <span class="text-sm text-white/50">v2.1.0</span>
          <div class="w-px h-4 bg-white/10"></div>
          <img src="ai4privacy-logo.png" alt="Logo" class="h-6 w-6 opacity-70">
        </div>
      </div>
    </div>
  </div>

  <script type="module">
    import { AutoModel, AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]';

    // Initialize variables
    let tokenizer, model;
    let isModelLoaded = false;
    let currentInput = "";

    // DOM Elements
    const inputText = document.getElementById('inputText');
    const outputText = document.getElementById('outputText');
    const statusElement = document.getElementById('processingStatus');

    // Add debounce to input handler
    let timeout;
    inputText.addEventListener('input', (event) => {
      currentInput = event.target.value;
      statusElement.textContent = 'Processing...';
      clearTimeout(timeout);
      timeout = setTimeout(updateOutput, 300);
    });

    async function loadModel() {
      try {
        tokenizer = await AutoTokenizer.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii');
        model = await AutoModel.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii', { dtype: "q8" });
        isModelLoaded = true;
        statusElement.textContent = 'Model loaded';
        updateOutput();
      } catch (err) {
        console.error("Error loading model:", err);
        statusElement.textContent = 'Error loading model';
        outputText.value = "Error loading model.";
      }
    }

    async function updateOutput() {
      if (!isModelLoaded) {
        statusElement.textContent = 'Loading model...';
        outputText.value = "";
        return;
      }
      
      try {
        const processed = await processText(currentInput, tokenizer, model);
        statusElement.textContent = `Processed ${currentInput.length} characters`;
        outputText.value = processed.maskedText;
        
        const privacyMaskDiv = document.getElementById('privacyMask');
        privacyMaskDiv.innerHTML = '';
        
        if (processed.replacements.length > 0) {
          processed.replacements.forEach(replacement => {
            const tile = document.createElement('div');
            tile.className = 'entity-tile bg-gray-800 p-3 rounded-lg border border-white/10 hover:border-white/20';
            tile.innerHTML = `
              <div class="text-xs text-white/60 mb-1">${replacement.placeholder}</div>
              <div class="text-sm text-white font-medium">${replacement.original}</div>
              <div class="text-xs text-white/40 mt-1">Sensitive Information</div>
              <div class="text-xs text-white/40 mt-1">Activation: ${Math.round(replacement.activation * 100)}%</div>
            `;
            privacyMaskDiv.appendChild(tile);
          });
        } else {
          const emptyState = document.createElement('div');
          emptyState.className = 'text-center text-white/40 py-4';
          emptyState.textContent = 'No sensitive information detected.';
          privacyMaskDiv.appendChild(emptyState);
        }
      } catch (err) {
        statusElement.textContent = 'Error processing text';
        console.error("Error processing text:", err);
        outputText.value = "Error processing text.";
      }
    }

    async function processText(text, tokenizer, model) {
      const inputs = await tokenizer(text);
      const inputTokens = inputs.input_ids.data;
      const tokenStrings = Array.from(inputTokens).map(id => 
        tokenizer.decode([id], { skip_special_tokens: false })
      );

      const { logits } = await model(inputs);
      const logitsData = Array.from(logits.data);
      const numTokens = tokenStrings.length;
      const numClasses = 3;

      const logitsPerToken = [];
      for (let i = 0; i < numTokens; i++) {
        logitsPerToken.push(logitsData.slice(i * numClasses, (i + 1) * numClasses));
      }

      function softmax(logits) {
        const expLogits = logits.map(Math.exp);
        const sumExp = expLogits.reduce((a, b) => a + b, 0);
        return expLogits.map(exp => exp / sumExp);
      }

      const tokenPredictions = tokenStrings.map((token, i) => {
        const probs = softmax(logitsPerToken[i]);
        const maxSensitive = Math.max(probs[0], probs[1]);
        return {
          token: token,
          start: i,
          end: i + 1,
          probabilities: {
            "B-PRIVATE": probs[0],
            "I-PRIVATE": probs[1],
            "O": probs[2]
          },
          maxSensitiveScore: maxSensitive
        };
      });

      const aggregated = aggregatePrivacyTokens(tokenPredictions);
      const { maskedText, replacements } = maskText(tokenPredictions, aggregated);
      return { maskedText, replacements };
    }

    function aggregatePrivacyTokens(tokenPredictions) {
      const threshold = parseFloat(document.getElementById('thresholdInput').value) || 0.01;
      const aggregated = [];
      let i = 0;
      const n = tokenPredictions.length;
      
      while (i < n) {
        const currentToken = tokenPredictions[i];
        if (['[CLS]', '[SEP]'].includes(currentToken.token)) {
          i++;
          continue;
        }
        const startsWithSpace = currentToken.token.startsWith(' ');
        const isFirstWord = aggregated.length === 0 && i === 0;
        if (startsWithSpace || isFirstWord) {
          const group = {
            tokens: [currentToken],
            indices: [i],
            scores: [currentToken.maxSensitiveScore],
            startsWithSpace: startsWithSpace
          };
          i++;
          while (i < n && 
                !tokenPredictions[i].token.startsWith(' ') && 
                !['[CLS]', '[SEP]'].includes(tokenPredictions[i].token)) {
            group.tokens.push(tokenPredictions[i]);
            group.indices.push(i);
            group.scores.push(tokenPredictions[i].maxSensitiveScore);
            i++;
          }
          if (Math.max(...group.scores) >= threshold) {
            aggregated.push(group);
          }
        } else {
          i++;
        }
      }
      return aggregated;
    }

    function maskText(tokenPredictions, aggregatedGroups) {
      const maskedTokens = [];
      const replacements = [];
      const maskedIndices = new Set();
      let redactedCounter = 1;
      
      aggregatedGroups.forEach(group => {
        group.indices.forEach(idx => maskedIndices.add(idx));
      });

      tokenPredictions.forEach((token, idx) => {
        if (['[CLS]', '[SEP]'].includes(token.token)) return;
        if (maskedIndices.has(idx)) {
          const group = aggregatedGroups.find(g => g.indices[0] === idx);
          if (group) {
            const originalTokens = group.tokens.map(t => t.token);
            const originalText = originalTokens
              .map((token, i) => (i === 0 && group.startsWithSpace ? token.trimStart() : token))
              .join('');
            const placeholder = `[PII_${redactedCounter}]`;
            replacements.push({ 
              original: originalText, 
              placeholder: placeholder,
              activation: Math.max(...group.scores) // Add activation score
            });
            redactedCounter++;
            const maskWithSpace = group.startsWithSpace ? ` ${placeholder}` : placeholder;
            maskedTokens.push(maskWithSpace);
          }
        } else {
          maskedTokens.push(token.token);
        }
      });

      return { maskedText: maskedTokens.join('').replace(/\s+/g, ' ').trim(), replacements };
    }

    // Load model when page loads
    loadModel();

    // Add settings toggle functionality
    const settingsButton = document.getElementById('settingsButton');
    const settingsPanel = document.getElementById('settingsPanel');
    let settingsVisible = false;

    settingsButton.addEventListener('click', (e) => {
      settingsVisible = !settingsVisible;
      settingsPanel.classList.toggle('hidden', !settingsVisible);
      e.stopPropagation();
    });

    document.addEventListener('click', (e) => {
      if (settingsVisible && !settingsPanel.contains(e.target)) {
        settingsPanel.classList.add('hidden');
        settingsVisible = false;
      }
    });
  </script>
</body>
</html>