Spaces:

TheFrenchDemos
/

wm-detection

Running

App Files Files Community

TheFrenchDemos commited on 3 days ago

Commit

7f97da4

1 Parent(s): f747801

implemented detection

Browse files

Files changed (6) hide show

wm_interactive/core/detector.py +9 -18
wm_interactive/core/generator.py +10 -7
wm_interactive/core/main.py +3 -3
wm_interactive/static/styles.css +34 -1
wm_interactive/templates/index.html +91 -13
wm_interactive/web/app.py +7 -4

wm_interactive/core/detector.py CHANGED Viewed

@@ -93,7 +93,8 @@ class WmDetector():
             score = float('nan')
             if is_scored:
-                score = self.score_tok(ngram_tokens, tokens_id[cur_pos]).numpy()[0]
             token_details.append({
                 'token_id': tokens_id[cur_pos],
@@ -168,12 +169,7 @@ class MarylandDetector(WmDetector):
     def score_tok(self, ngram_tokens, token_id):
         """
-        score_t = 1 if token_id in greenlist else 0
-        The last line shifts the scores by token_id.
-        ex: scores[0] = 1 if token_id in greenlist else 0
-            scores[1] = 1 if token_id in (greenlist shifted of 1) else 0
-            ...
-        The score for each payload will be given by scores[payload]
         """
         seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
@@ -181,7 +177,7 @@ class MarylandDetector(WmDetector):
         vocab_permutation = torch.randperm(self.vocab_size, generator=self.rng)
         greenlist = vocab_permutation[:int(self.gamma * self.vocab_size)] # gamma * n toks in the greenlist
         scores[greenlist] = 1
-        return scores.roll(-token_id)
     def get_pvalue(self, score: int, ntoks: int, eps: float):
         """ from cdf of a binomial distribution """
@@ -209,7 +205,7 @@ class MarylandDetectorZ(WmDetector):
         vocab_permutation = torch.randperm(self.vocab_size, generator=self.rng)
         greenlist = vocab_permutation[:int(self.gamma * self.vocab_size)] # gamma * n
         scores[greenlist] = 1
-        return scores.roll(-token_id)
     def get_pvalue(self, score: int, ntoks: int, eps: float):
         """ from cdf of a normal distribution """
@@ -229,17 +225,12 @@ class OpenaiDetector(WmDetector):
     def score_tok(self, ngram_tokens, token_id):
         """
         score_t = -log(1 - rt[token_id]])
-        The last line shifts the scores by token_id.
-        ex: scores[0] = r_t[token_id]
-            scores[1] = (r_t shifted of 1)[token_id]
-            ...
-        The score for each payload will be given by scores[payload]
         """
         seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
         rs = torch.rand(self.vocab_size, generator=self.rng) # n
-        scores = -(1 - rs).log().roll(-token_id)
-        return scores
     def get_pvalue(self, score: float, ntoks: int, eps: float):
         """ from cdf of a gamma distribution """
@@ -260,8 +251,8 @@ class OpenaiDetectorZ(WmDetector):
         seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
         rs = torch.rand(self.vocab_size, generator=self.rng) # n
-        scores = -(1 - rs).log().roll(-token_id)
-        return scores
     def get_pvalue(self, score: float, ntoks: int, eps: float):
         """ from cdf of a normal distribution """

             score = float('nan')
             if is_scored:
+                score = self.score_tok(ngram_tokens, tokens_id[cur_pos])
+                score = float(score)
             token_details.append({
                 'token_id': tokens_id[cur_pos],
     def score_tok(self, ngram_tokens, token_id):
         """
+        score_t = 1 if token_id in greenlist else 0
         """
         seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
         vocab_permutation = torch.randperm(self.vocab_size, generator=self.rng)
         greenlist = vocab_permutation[:int(self.gamma * self.vocab_size)] # gamma * n toks in the greenlist
         scores[greenlist] = 1
+        return scores[token_id]
     def get_pvalue(self, score: int, ntoks: int, eps: float):
         """ from cdf of a binomial distribution """
         vocab_permutation = torch.randperm(self.vocab_size, generator=self.rng)
         greenlist = vocab_permutation[:int(self.gamma * self.vocab_size)] # gamma * n
         scores[greenlist] = 1
+        return scores[token_id]
     def get_pvalue(self, score: int, ntoks: int, eps: float):
         """ from cdf of a normal distribution """
     def score_tok(self, ngram_tokens, token_id):
         """
         score_t = -log(1 - rt[token_id]])
         """
         seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
         rs = torch.rand(self.vocab_size, generator=self.rng) # n
+        scores = -(1 - rs).log()
+        return scores[token_id]
     def get_pvalue(self, score: float, ntoks: int, eps: float):
         """ from cdf of a gamma distribution """
         seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
         rs = torch.rand(self.vocab_size, generator=self.rng) # n
+        scores = -(1 - rs).log()
+        return scores[token_id]
     def get_pvalue(self, score: float, ntoks: int, eps: float):
         """ from cdf of a normal distribution """

wm_interactive/core/generator.py CHANGED Viewed

@@ -11,7 +11,8 @@ class WmGenerator():
         model: AutoModelForCausalLM,
         tokenizer: AutoTokenizer,
         ngram: int = 1,
-        seed: int = 0
     ):
         # model config
         self.tokenizer = tokenizer
@@ -49,11 +50,15 @@ class WmGenerator():
         start_pos = prompt_size
         prev_pos = 0
         for cur_pos in range(start_pos, total_len):
             outputs = self.model.forward(
-                tokens[:, prev_pos:cur_pos], use_cache=True, past_key_values=outputs.past_key_values if prev_pos > 0 else None
             )
             aux = {
-                'ngram_tokens': tokens[:, cur_pos-self.ngram:cur_pos],
                 'cur_pos': cur_pos,
             }
             next_tok = self.sample_next(outputs.logits[:, -1, :], aux, temperature, top_p)
@@ -135,7 +140,7 @@ class OpenaiGenerator(WmGenerator):
             probs_sort[mask] = 0.0
             probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
             # seed with hash of ngram tokens
-            seed = get_seed_rng(self.seed, ngram_tokens[0])
             self.rng.manual_seed(seed)
             # generate rs randomly between [0,1]
             rs = torch.rand(self.vocab_size, generator=self.rng) # n
@@ -164,13 +169,11 @@ class MarylandGenerator(WmGenerator):
             *args,
             gamma: float = 0.5,
             delta: float = 1.0,
-            test_mul: float = 0,
             **kwargs
         ):
         super().__init__(*args, **kwargs)
         self.gamma = gamma
         self.delta = delta
-        self.test_mul = test_mul
     def sample_next(
         self,
@@ -198,7 +201,7 @@ class MarylandGenerator(WmGenerator):
     def logits_processor(self, logits, ngram_tokens):
         """Process logits to mask out words in greenlist."""
         logits = logits.clone()
-        seed = get_seed_rng(self.seed, ngram_tokens[0])
         self.rng.manual_seed(seed)
         vocab_permutation = torch.randperm(self.vocab_size, generator=self.rng)
         greenlist = vocab_permutation[:int(self.gamma * self.vocab_size)] # gamma * n

         model: AutoModelForCausalLM,
         tokenizer: AutoTokenizer,
         ngram: int = 1,
+        seed: int = 0,
+        **kwargs
     ):
         # model config
         self.tokenizer = tokenizer
         start_pos = prompt_size
         prev_pos = 0
         for cur_pos in range(start_pos, total_len):
+            past_key_values = outputs.past_key_values if prev_pos > 0 else None
             outputs = self.model.forward(
+                tokens[:, prev_pos:cur_pos],
+                use_cache=True,
+                past_key_values=past_key_values
             )
+            ngram_tokens = tokens[0, cur_pos-self.ngram:cur_pos].tolist()
             aux = {
+                'ngram_tokens': ngram_tokens,
                 'cur_pos': cur_pos,
             }
             next_tok = self.sample_next(outputs.logits[:, -1, :], aux, temperature, top_p)
             probs_sort[mask] = 0.0
             probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
             # seed with hash of ngram tokens
+            seed = get_seed_rng(self.seed, ngram_tokens)
             self.rng.manual_seed(seed)
             # generate rs randomly between [0,1]
             rs = torch.rand(self.vocab_size, generator=self.rng) # n
             *args,
             gamma: float = 0.5,
             delta: float = 1.0,
             **kwargs
         ):
         super().__init__(*args, **kwargs)
         self.gamma = gamma
         self.delta = delta
     def sample_next(
         self,
     def logits_processor(self, logits, ngram_tokens):
         """Process logits to mask out words in greenlist."""
         logits = logits.clone()
+        seed = get_seed_rng(self.seed, ngram_tokens)
         self.rng.manual_seed(seed)
         vocab_permutation = torch.randperm(self.vocab_size, generator=self.rng)
         greenlist = vocab_permutation[:int(self.gamma * self.vocab_size)] # gamma * n

wm_interactive/core/main.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Main script for watermark detection.
 Test with:
-    python -m wm_interactive.core.main --model_name smollm2-135m --prompt_path data/prompts.json
 """
 import os
@@ -116,9 +116,9 @@ def get_args_parser():
                        help='Statistical test to detect watermark. Choose from: same (same as method), openai, openaiz, maryland, marylandz')
     parser.add_argument('--seed', type=int, default=0,
                        help='Random seed for reproducibility')
-    parser.add_argument('--ngram', type=int, default=4,
                        help='n-gram size for rng key generation')
-    parser.add_argument('--gamma', type=float, default=0.25,
                        help='For maryland method: proportion of greenlist tokens')
     parser.add_argument('--delta', type=float, default=2.0,
                        help='For maryland method: bias to add to greenlist tokens')

 """
 Main script for watermark detection.
 Test with:
+    python -m wm_interactive.core.main --model_name smollm2-135m --prompt_path data/prompts.json --method maryland --delta 4.0 --ngram 1
 """
 import os
                        help='Statistical test to detect watermark. Choose from: same (same as method), openai, openaiz, maryland, marylandz')
     parser.add_argument('--seed', type=int, default=0,
                        help='Random seed for reproducibility')
+    parser.add_argument('--ngram', type=int, default=1,
                        help='n-gram size for rng key generation')
+    parser.add_argument('--gamma', type=float, default=0.5,
                        help='For maryland method: proportion of greenlist tokens')
     parser.add_argument('--delta', type=float, default=2.0,
                        help='For maryland method: bias to add to greenlist tokens')

wm_interactive/static/styles.css CHANGED Viewed

@@ -117,6 +117,39 @@ h1 {
 .stat-label {
     color: #666;
     font-size: 20px;
 }
 /* Mobile-specific styles */
@@ -153,4 +186,4 @@ h1 {
     .stat-label {
         font-size: 16px;
     }
-}

 .stat-label {
     color: #666;
     font-size: 20px;
+    position: relative;
+    display: inline-flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+.help-icon {
+    cursor: help;
+    color: #6c757d;
+    font-size: 0.875rem;
+}
+.help-tooltip {
+    visibility: hidden;
+    position: absolute;
+    bottom: 100%;
+    left: 50%;
+    transform: translateX(-50%);
+    background-color: #333;
+    color: white;
+    padding: 0.5rem;
+    border-radius: 4px;
+    font-size: 0.75rem;
+    width: max-content;
+    max-width: 200px;
+    z-index: 1000;
+    opacity: 0;
+    transition: opacity 0.2s;
+}
+.help-icon:hover + .help-tooltip {
+    visibility: visible;
+    opacity: 1;
 }
 /* Mobile-specific styles */
     .stat-label {
         font-size: 16px;
     }
+}

wm_interactive/templates/index.html CHANGED Viewed

@@ -45,6 +45,16 @@
                             <input type="number" class="form-control" id="ngram" value="1">
                             <div class="form-text">Size of the n-gram window used for detection</div>
                         </div>
                     </div>
                     <div class="modal-footer">
                         <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
@@ -73,19 +83,35 @@
         <div class="stats-container">
             <div>
                 <div class="stat-value" id="tokenCount">0</div>
-                <div class="stat-label">Tokens</div>
             </div>
             <div>
                 <div class="stat-value" id="scoredTokens">0</div>
-                <div class="stat-label">Scored Tokens</div>
             </div>
             <div>
                 <div class="stat-value" id="finalScore">0.00</div>
-                <div class="stat-label">Final Score</div>
             </div>
             <div>
                 <div class="stat-value" id="pValue">0.500</div>
-                <div class="stat-label">P-value</div>
             </div>
         </div>
     </div>
@@ -93,7 +119,7 @@
     <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
     <script>
         let debounceTimeout = null;
-        let eventSource = null;
         const textarea = document.getElementById('user_text');
         const promptArea = document.getElementById('prompt_text');
         const generateBtn = document.getElementById('generateBtn');
@@ -107,6 +133,8 @@
         const seedInput = document.getElementById('seed');
         const ngramInput = document.getElementById('ngram');
         const detectorTypeSelect = document.getElementById('detectorType');
         function startGeneration() {
             const prompt = promptArea.value.trim();
@@ -119,11 +147,16 @@
             stopBtn.disabled = false;
             textarea.value = '';
             // Get current parameters
             const params = {
                 detector_type: detectorTypeSelect.value,
                 seed: parseInt(seedInput.value) || 0,
-                ngram: parseInt(ngramInput.value) || 1
             };
             // Create headers for SSE
@@ -132,14 +165,15 @@
                 'Accept': 'text/event-stream',
             });
-            // Start fetch request
             fetch('/generate', {
                 method: 'POST',
                 headers: headers,
                 body: JSON.stringify({
                     prompt: prompt,
                     params: params
-                })
             }).then(response => {
                 const reader = response.body.getReader();
                 const decoder = new TextDecoder();
@@ -205,16 +239,25 @@
                 return pump();
             })
             .catch(error => {
-                console.error('Error:', error);
-                alert('Error: Failed to generate text');
             })
             .finally(() => {
                 generateBtn.disabled = false;
                 stopBtn.disabled = true;
             });
         }
         function stopGeneration() {
             generateBtn.disabled = false;
             stopBtn.disabled = true;
         }
@@ -230,6 +273,8 @@
                 // Validate parameters before sending
                 const seed = parseInt(seedInput.value);
                 const ngram = parseInt(ngramInput.value);
                 const response = await fetch('/tokenize', {
                     method: 'POST',
@@ -241,7 +286,9 @@
                         params: {
                             detector_type: detectorTypeSelect.value,
                             seed: isNaN(seed) ? 0 : seed,
-                            ngram: isNaN(ngram) ? 1 : ngram
                         }
                     })
                 });
@@ -262,7 +309,7 @@
                     const score = data.scores[i];
                     const pvalue = data.pvalues[i];
                     const scoreDisplay = (score !== null && !isNaN(score)) ? score.toFixed(3) : 'N/A';
-                    const pvalueDisplay = (pvalue !== null && !isNaN(pvalue)) ? pvalue.toFixed(3) : 'N/A';
                     return `<span class="token" style="background-color: ${data.colors[i]}">
                         ${token}
@@ -279,7 +326,7 @@
                 finalScore.textContent = (data.final_score !== null && !isNaN(data.final_score)) ?
                     data.final_score.toFixed(2) : '0.00';
                 pValue.textContent = (data.final_pvalue !== null && !isNaN(data.final_pvalue)) ?
-                    data.final_pvalue.toFixed(3) : '0.500';
                 // Clear any previous error
                 const existingError = tokenDisplay.querySelector('.alert-danger');
@@ -332,6 +379,28 @@
             debounceTimeout = setTimeout(updateTokenization, 500);
         });
         // Add keyboard shortcut for applying changes
         document.addEventListener('keydown', function(e) {
             if ((e.metaKey || e.ctrlKey) && e.key === 'Enter') {
@@ -369,6 +438,15 @@
                 console.error('Error during initial tokenization:', error);
             });
         });
     </script>
 </body>
 </html>

                             <input type="number" class="form-control" id="ngram" value="1">
                             <div class="form-text">Size of the n-gram window used for detection</div>
                         </div>
+                        <div class="mb-3">
+                            <label for="delta" class="form-label">Delta</label>
+                            <input type="number" step="0.1" class="form-control" id="delta" value="2.0">
+                            <div class="form-text">Bias added to greenlist tokens (for Maryland method)</div>
+                        </div>
+                        <div class="mb-3">
+                            <label for="temperature" class="form-label">Temperature</label>
+                            <input type="number" step="0.1" class="form-control" id="temperature" value="0.8">
+                            <div class="form-text">Temperature for sampling (higher = more random)</div>
+                        </div>
                     </div>
                     <div class="modal-footer">
                         <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
         <div class="stats-container">
             <div>
                 <div class="stat-value" id="tokenCount">0</div>
+                <div class="stat-label">
+                    Tokens
+                    <i class="bi bi-question-circle help-icon"></i>
+                    <span class="help-tooltip">Total number of tokens in the text</span>
+                </div>
             </div>
             <div>
                 <div class="stat-value" id="scoredTokens">0</div>
+                <div class="stat-label">
+                    Scored Tokens
+                    <i class="bi bi-question-circle help-icon"></i>
+                    <span class="help-tooltip">Number of tokens that were actually scored by the detector (excludes first n-gram tokens and duplicates)</span>
+                </div>
             </div>
             <div>
                 <div class="stat-value" id="finalScore">0.00</div>
+                <div class="stat-label">
+                    Final Score
+                    <i class="bi bi-question-circle help-icon"></i>
+                    <span class="help-tooltip">Cumulative score from all scored tokens. Higher values indicate more likely watermarked text</span>
+                </div>
             </div>
             <div>
                 <div class="stat-value" id="pValue">0.500</div>
+                <div class="stat-label">
+                    P-value
+                    <i class="bi bi-question-circle help-icon"></i>
+                    <span class="help-tooltip">Statistical significance of the score. Lower values indicate stronger evidence of watermarking (p < 0.05 is typically considered significant)</span>
+                </div>
             </div>
         </div>
     </div>
     <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
     <script>
         let debounceTimeout = null;
+        let abortController = null;  // Add this line at the top with other variables
         const textarea = document.getElementById('user_text');
         const promptArea = document.getElementById('prompt_text');
         const generateBtn = document.getElementById('generateBtn');
         const seedInput = document.getElementById('seed');
         const ngramInput = document.getElementById('ngram');
         const detectorTypeSelect = document.getElementById('detectorType');
+        const deltaInput = document.getElementById('delta');
+        const temperatureInput = document.getElementById('temperature');
         function startGeneration() {
             const prompt = promptArea.value.trim();
             stopBtn.disabled = false;
             textarea.value = '';
+            // Create new AbortController for this request
+            abortController = new AbortController();
             // Get current parameters
             const params = {
                 detector_type: detectorTypeSelect.value,
                 seed: parseInt(seedInput.value) || 0,
+                ngram: parseInt(ngramInput.value) || 1,
+                delta: parseFloat(deltaInput.value) || 2.0,
+                temperature: parseFloat(temperatureInput.value) || 0.8
             };
             // Create headers for SSE
                 'Accept': 'text/event-stream',
             });
+            // Start fetch request with signal
             fetch('/generate', {
                 method: 'POST',
                 headers: headers,
                 body: JSON.stringify({
                     prompt: prompt,
                     params: params
+                }),
+                signal: abortController.signal  // Add the abort signal
             }).then(response => {
                 const reader = response.body.getReader();
                 const decoder = new TextDecoder();
                 return pump();
             })
             .catch(error => {
+                if (error.name === 'AbortError') {
+                    console.log('Generation stopped by user');
+                } else {
+                    console.error('Error:', error);
+                    alert('Error: Failed to generate text');
+                }
             })
             .finally(() => {
                 generateBtn.disabled = false;
                 stopBtn.disabled = true;
+                abortController = null;
             });
         }
         function stopGeneration() {
+            if (abortController) {
+                abortController.abort();
+                abortController = null;
+            }
             generateBtn.disabled = false;
             stopBtn.disabled = true;
         }
                 // Validate parameters before sending
                 const seed = parseInt(seedInput.value);
                 const ngram = parseInt(ngramInput.value);
+                const delta = parseFloat(deltaInput.value);
+                const temperature = parseFloat(temperatureInput.value);
                 const response = await fetch('/tokenize', {
                     method: 'POST',
                         params: {
                             detector_type: detectorTypeSelect.value,
                             seed: isNaN(seed) ? 0 : seed,
+                            ngram: isNaN(ngram) ? 1 : ngram,
+                            delta: isNaN(delta) ? 2.0 : delta,
+                            temperature: isNaN(temperature) ? 0.8 : temperature
                         }
                     })
                 });
                     const score = data.scores[i];
                     const pvalue = data.pvalues[i];
                     const scoreDisplay = (score !== null && !isNaN(score)) ? score.toFixed(3) : 'N/A';
+                    const pvalueDisplay = (pvalue !== null && !isNaN(pvalue)) ? formatPValue(pvalue) : 'N/A';
                     return `<span class="token" style="background-color: ${data.colors[i]}">
                         ${token}
                 finalScore.textContent = (data.final_score !== null && !isNaN(data.final_score)) ?
                     data.final_score.toFixed(2) : '0.00';
                 pValue.textContent = (data.final_pvalue !== null && !isNaN(data.final_pvalue)) ?
+                    formatPValue(data.final_pvalue) : '0.500';
                 // Clear any previous error
                 const existingError = tokenDisplay.querySelector('.alert-danger');
             debounceTimeout = setTimeout(updateTokenization, 500);
         });
+        deltaInput.addEventListener('input', function() {
+            const value = this.value === '' ? '' : parseFloat(this.value);
+            if (isNaN(value) && this.value !== '') {
+                this.value = "2.0";
+            }
+            if (debounceTimeout) {
+                clearTimeout(debounceTimeout);
+            }
+            debounceTimeout = setTimeout(updateTokenization, 500);
+        });
+        temperatureInput.addEventListener('input', function() {
+            const value = this.value === '' ? '' : parseFloat(this.value);
+            if (isNaN(value) && this.value !== '') {
+                this.value = "0.8";
+            }
+            if (debounceTimeout) {
+                clearTimeout(debounceTimeout);
+            }
+            debounceTimeout = setTimeout(updateTokenization, 500);
+        });
         // Add keyboard shortcut for applying changes
         document.addEventListener('keydown', function(e) {
             if ((e.metaKey || e.ctrlKey) && e.key === 'Enter') {
                 console.error('Error during initial tokenization:', error);
             });
         });
+        // Add this helper function for formatting p-values
+        function formatPValue(value) {
+            if (value >= 0.001) {
+                return value.toFixed(3);
+            } else {
+                return value.toExponential(2);
+            }
+        }
     </script>
 </body>
 </html>

wm_interactive/web/app.py CHANGED Viewed

@@ -146,6 +146,7 @@ def create_app():
             prompt = template_prompt(data.get('prompt', ''))
             params = data.get('params', {})
             def generate_stream():
                 try:
@@ -155,7 +156,8 @@ def create_app():
                         model=model,
                         tokenizer=tokenizer,
                         ngram=set_to_int(params.get('ngram', 1)),
-                        seed=set_to_int(params.get('seed', 0))
                     )
                     # Get special tokens to filter out
@@ -190,15 +192,16 @@ def create_app():
                         )
                         # Sample next token using the generator's sampling method
                         aux = {
-                            'ngram_tokens': tokens[:, cur_pos-generator.ngram:cur_pos],
                             'cur_pos': cur_pos,
                         }
                         next_token = generator.sample_next(
                             outputs.logits[:, -1, :],
                             aux,
-                            temperature=0.8,
-                            top_p=0.95
                         )
                         # Check for EOS token
                         if next_token == model.config.eos_token_id:

             prompt = template_prompt(data.get('prompt', ''))
             params = data.get('params', {})
+            temperature = float(params.get('temperature', 0.8))
             def generate_stream():
                 try:
                         model=model,
                         tokenizer=tokenizer,
                         ngram=set_to_int(params.get('ngram', 1)),
+                        seed=set_to_int(params.get('seed', 0)),
+                        delta=float(params.get('delta', 2.0)),
                     )
                     # Get special tokens to filter out
                         )
                         # Sample next token using the generator's sampling method
+                        ngram_tokens = tokens[0, cur_pos-generator.ngram:cur_pos].tolist()
                         aux = {
+                            'ngram_tokens': ngram_tokens,
                             'cur_pos': cur_pos,
                         }
                         next_token = generator.sample_next(
                             outputs.logits[:, -1, :],
                             aux,
+                            temperature=temperature,
+                            top_p=0.9
                         )
                         # Check for EOS token
                         if next_token == model.config.eos_token_id: