Spaces:

m-ric
/

rag_highlights

Runtime error

App Files Files Community

m-ric HF Staff commited on Sep 27, 2024

Commit

8d2fb93

1 Parent(s): 96879fc

Ok2

Browse files

Files changed (1) hide show

app.py +6 -9

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
     attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
     ### FIND ZONES OF INTEREST
-    threshold_per_token = 0.25
     kernel_width = 6
     context_width = 20  # Number of tokens to include as context on each side
     kernel = np.ones((kernel_width, kernel_width))
@@ -85,24 +85,22 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
     output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
     for row in range(kernel_width, len(generated_tokens)):
         best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
         if best_width is not None:
             output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
         else:
             output_with_notes.append((generated_tokens[row], None))
     # Fuse the notes for consecutive output tokens if necessary
     for i in range(len(output_with_notes)):
         token, coords = output_with_notes[i]
         if coords is not None:
             best_width, best_patch_start = coords
             note_width_generated = kernel_width
-            for next_id in output_with_notes[i+1, i+2*kernel_width]:
                 next_token, next_coords = output_with_notes[next_id]
                 if next_coords is not None:
                     next_width, next_patch_start = next_coords
-                    if best_patch_start + best_width > next_patch_start:
                         # then notes are overlapping: thus we delete the last one and make the first wider if needed
                         output_with_notes[next_id] = (next_token, None)
                         larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
@@ -112,6 +110,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
         else:
             output_with_notes[i] = (token, None, None)
     for i, (token, coords, width) in enumerate(output_with_notes):
         if coords is not None:
             best_width, best_patch_start = coords
@@ -122,8 +121,6 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
             first_part = "".join(input_tokens[context_start:significant_start])
             significant_part = "".join(input_tokens[significant_start:significant_end])
             final_part = "".join(input_tokens[significant_end:context_end])
-            print("KK", first_part, significant_part, final_part)
             output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
     return output_with_notes
@@ -136,7 +133,7 @@ def create_html_with_hover(output_with_notes):
         (token, notes, width) = output_with_notes[i]
         if notes is None:
             html += f'{token}'
-            i +=1
         else:
             text = "".join([element[0] for element in output_with_notes[i:i+width]])
             first_part, significant_part, final_part = notes
@@ -144,7 +141,7 @@ def create_html_with_hover(output_with_notes):
             html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
             html += f'<span class="hover-note">{formatted_note}</span></span>'
             note_number += 1
-            i+=width+1
     html += "</div>"
     return html

     attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
     ### FIND ZONES OF INTEREST
+    threshold_per_token = 0.22
     kernel_width = 6
     context_width = 20  # Number of tokens to include as context on each side
     kernel = np.ones((kernel_width, kernel_width))
     output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
     for row in range(kernel_width, len(generated_tokens)):
         best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
         if best_width is not None:
             output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
         else:
             output_with_notes.append((generated_tokens[row], None))
     # Fuse the notes for consecutive output tokens if necessary
     for i in range(len(output_with_notes)):
         token, coords = output_with_notes[i]
         if coords is not None:
             best_width, best_patch_start = coords
             note_width_generated = kernel_width
+            for next_id in range(i+1, min(i+2*kernel_width, len(output_with_notes))):
                 next_token, next_coords = output_with_notes[next_id]
                 if next_coords is not None:
                     next_width, next_patch_start = next_coords
+                    if best_patch_start + best_width >= next_patch_start:
                         # then notes are overlapping: thus we delete the last one and make the first wider if needed
                         output_with_notes[next_id] = (next_token, None)
                         larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
         else:
             output_with_notes[i] = (token, None, None)
+    # Convert to text slices
     for i, (token, coords, width) in enumerate(output_with_notes):
         if coords is not None:
             best_width, best_patch_start = coords
             first_part = "".join(input_tokens[context_start:significant_start])
             significant_part = "".join(input_tokens[significant_start:significant_end])
             final_part = "".join(input_tokens[significant_end:context_end])
             output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
     return output_with_notes
         (token, notes, width) = output_with_notes[i]
         if notes is None:
             html += f'{token}'
+            i += 1
         else:
             text = "".join([element[0] for element in output_with_notes[i:i+width]])
             first_part, significant_part, final_part = notes
             html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
             html += f'<span class="hover-note">{formatted_note}</span></span>'
             note_number += 1
+            i += width
     html += "</div>"
     return html