Spaces:

m-ric
/

rag_highlights

Runtime error

App Files Files Community

m-ric HF Staff commited on Sep 27, 2024

Commit

96879fc

1 Parent(s): 2343db7

Ok

Browse files

Files changed (1) hide show

app.py +46 -32

app.py CHANGED Viewed

@@ -11,7 +11,6 @@ model = LlamaForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", t
 tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 attnlrp.register(model)
 def really_clean_tokens(tokens):
     tokens = clean_tokens(tokens)
     cleaned_tokens = []
@@ -33,7 +32,7 @@ def generate_and_visualize(prompt, num_tokens=10):
     all_relevances = []
     for _ in range(num_tokens):
-        output_logits = model(inputs_embeds=input_embeds.requires_grad_(), use_cache=False).logits
         max_logits, max_indices = torch.max(output_logits[0, -1, :], dim=-1)
         max_logits.backward(max_logits)
@@ -54,7 +53,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
     attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
     ### FIND ZONES OF INTEREST
-    threshold_per_token = 0.3
     kernel_width = 6
     context_width = 20  # Number of tokens to include as context on each side
     kernel = np.ones((kernel_width, kernel_width))
@@ -66,48 +65,58 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
     significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
     def find_largest_contiguous_patch(array):
-        current_patch_end = None
-        best_width, best_patch_end = None, None
         current_width = 0
         for i in range(len(array)):
             if array[i]:
-                if current_patch_end is not None and current_patch_end == i-1:
                     current_width += 1
-                    current_patch_end = i
                 else:
-                    current_patch_end = i
                     current_width = 1
-                if current_patch_end and (best_width is None or current_width > best_width):
-                    best_patch_end = current_patch_end
                     best_width = current_width
             else:
                 current_width = 0
-        return best_width, best_patch_end
     output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
     for row in range(kernel_width, len(generated_tokens)):
-        best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
         if best_width is not None:
-            # Fuse the notes for consecutive output tokens if necessary
-            for i in range(len(output_with_notes)-2*kernel_width, len(output_with_notes)):
-                token, coords = output_with_notes[i]
-                if coords is not None:
-                    prev_width, prev_patch_end = coords
-                    if prev_patch_end > best_patch_end - best_width:
-                        # then notes are overlapping: thus we delete the first one and make the last wider if needed
-                        output_with_notes[i] = (token, None)
-                        if prev_patch_end - prev_width < best_patch_end - best_width:
-                            best_width = best_patch_end - prev_patch_end - prev_width
-            output_with_notes.append((generated_tokens[row], (best_width, best_patch_end)))
         else:
             output_with_notes.append((generated_tokens[row], None))
-    for i, (token, coords) in enumerate(output_with_notes):
         if coords is not None:
-            best_width, best_patch_end = coords
-            significant_start = max(0, best_patch_end - best_width)
-            significant_end = best_patch_end + kernel_width
             context_start = max(0, significant_start - context_width)
             context_end = min(len(input_tokens), significant_end + context_width)
             first_part = "".join(input_tokens[context_start:significant_start])
@@ -115,22 +124,27 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
             final_part = "".join(input_tokens[significant_end:context_end])
             print("KK", first_part, significant_part, final_part)
-            output_with_notes[i] = (token, (first_part, significant_part, final_part))
     return output_with_notes
 def create_html_with_hover(output_with_notes):
     html = "<div id='output-container'>"
     note_number = 0
-    for (text, notes) in output_with_notes:
-        if notes:
             first_part, significant_part, final_part = notes
             formatted_note = f'{first_part}<strong>{significant_part}</strong>{final_part}'
             html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
             html += f'<span class="hover-note">{formatted_note}</span></span>'
             note_number += 1
-        else:
-            html += f'{text}'
     html += "</div>"
     return html

 tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 attnlrp.register(model)
 def really_clean_tokens(tokens):
     tokens = clean_tokens(tokens)
     cleaned_tokens = []
     all_relevances = []
     for _ in range(num_tokens):
+        output_logits = model(inputs_embeds=input_embeds.requires_grad_()).logits
         max_logits, max_indices = torch.max(output_logits[0, -1, :], dim=-1)
         max_logits.backward(max_logits)
     attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
     ### FIND ZONES OF INTEREST
+    threshold_per_token = 0.25
     kernel_width = 6
     context_width = 20  # Number of tokens to include as context on each side
     kernel = np.ones((kernel_width, kernel_width))
     significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
     def find_largest_contiguous_patch(array):
+        current_patch_start = None
+        best_width, best_patch_start = None, None
         current_width = 0
         for i in range(len(array)):
             if array[i]:
+                if current_patch_start is not None and current_patch_start + current_width == i:
                     current_width += 1
                 else:
+                    current_patch_start = i
                     current_width = 1
+                if current_patch_start and (best_width is None or current_width > best_width):
+                    best_patch_start = current_patch_start
                     best_width = current_width
             else:
                 current_width = 0
+        return best_width, best_patch_start
     output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
     for row in range(kernel_width, len(generated_tokens)):
+        best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
         if best_width is not None:
+            output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
         else:
             output_with_notes.append((generated_tokens[row], None))
+    # Fuse the notes for consecutive output tokens if necessary
+    for i in range(len(output_with_notes)):
+        token, coords = output_with_notes[i]
         if coords is not None:
+            best_width, best_patch_start = coords
+            note_width_generated = kernel_width
+            for next_id in output_with_notes[i+1, i+2*kernel_width]:
+                next_token, next_coords = output_with_notes[next_id]
+                if next_coords is not None:
+                    next_width, next_patch_start = next_coords
+                    if best_patch_start + best_width > next_patch_start:
+                        # then notes are overlapping: thus we delete the last one and make the first wider if needed
+                        output_with_notes[next_id] = (next_token, None)
+                        larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
+                        best_width = larger_end - best_patch_start
+                        note_width_generated = kernel_width + (next_id-i)
+            output_with_notes[i] = (token, (best_width, best_patch_start), note_width_generated)
+        else:
+            output_with_notes[i] = (token, None, None)
+    for i, (token, coords, width) in enumerate(output_with_notes):
+        if coords is not None:
+            best_width, best_patch_start = coords
+            significant_start = max(0, best_patch_start)
+            significant_end = best_patch_start + kernel_width + best_width
             context_start = max(0, significant_start - context_width)
             context_end = min(len(input_tokens), significant_end + context_width)
             first_part = "".join(input_tokens[context_start:significant_start])
             final_part = "".join(input_tokens[significant_end:context_end])
             print("KK", first_part, significant_part, final_part)
+            output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
     return output_with_notes
 def create_html_with_hover(output_with_notes):
     html = "<div id='output-container'>"
     note_number = 0
+    i = 0
+    while i < len(output_with_notes):
+        (token, notes, width) = output_with_notes[i]
+        if notes is None:
+            html += f'{token}'
+            i +=1
+        else:
+            text = "".join([element[0] for element in output_with_notes[i:i+width]])
             first_part, significant_part, final_part = notes
             formatted_note = f'{first_part}<strong>{significant_part}</strong>{final_part}'
             html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
             html += f'<span class="hover-note">{formatted_note}</span></span>'
             note_number += 1
+            i+=width+1
     html += "</div>"
     return html