m-ric HF Staff commited on
Commit
8d2fb93
Β·
1 Parent(s): 96879fc
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -53,7 +53,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
53
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
54
 
55
  ### FIND ZONES OF INTEREST
56
- threshold_per_token = 0.25
57
  kernel_width = 6
58
  context_width = 20 # Number of tokens to include as context on each side
59
  kernel = np.ones((kernel_width, kernel_width))
@@ -85,24 +85,22 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
85
  output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
86
  for row in range(kernel_width, len(generated_tokens)):
87
  best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
88
-
89
  if best_width is not None:
90
  output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
91
  else:
92
  output_with_notes.append((generated_tokens[row], None))
93
 
94
-
95
  # Fuse the notes for consecutive output tokens if necessary
96
  for i in range(len(output_with_notes)):
97
  token, coords = output_with_notes[i]
98
  if coords is not None:
99
  best_width, best_patch_start = coords
100
  note_width_generated = kernel_width
101
- for next_id in output_with_notes[i+1, i+2*kernel_width]:
102
  next_token, next_coords = output_with_notes[next_id]
103
  if next_coords is not None:
104
  next_width, next_patch_start = next_coords
105
- if best_patch_start + best_width > next_patch_start:
106
  # then notes are overlapping: thus we delete the last one and make the first wider if needed
107
  output_with_notes[next_id] = (next_token, None)
108
  larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
@@ -112,6 +110,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
112
  else:
113
  output_with_notes[i] = (token, None, None)
114
 
 
115
  for i, (token, coords, width) in enumerate(output_with_notes):
116
  if coords is not None:
117
  best_width, best_patch_start = coords
@@ -122,8 +121,6 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
122
  first_part = "".join(input_tokens[context_start:significant_start])
123
  significant_part = "".join(input_tokens[significant_start:significant_end])
124
  final_part = "".join(input_tokens[significant_end:context_end])
125
- print("KK", first_part, significant_part, final_part)
126
-
127
  output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
128
 
129
  return output_with_notes
@@ -136,7 +133,7 @@ def create_html_with_hover(output_with_notes):
136
  (token, notes, width) = output_with_notes[i]
137
  if notes is None:
138
  html += f'{token}'
139
- i +=1
140
  else:
141
  text = "".join([element[0] for element in output_with_notes[i:i+width]])
142
  first_part, significant_part, final_part = notes
@@ -144,7 +141,7 @@ def create_html_with_hover(output_with_notes):
144
  html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
145
  html += f'<span class="hover-note">{formatted_note}</span></span>'
146
  note_number += 1
147
- i+=width+1
148
  html += "</div>"
149
  return html
150
 
 
53
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
54
 
55
  ### FIND ZONES OF INTEREST
56
+ threshold_per_token = 0.22
57
  kernel_width = 6
58
  context_width = 20 # Number of tokens to include as context on each side
59
  kernel = np.ones((kernel_width, kernel_width))
 
85
  output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
86
  for row in range(kernel_width, len(generated_tokens)):
87
  best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
 
88
  if best_width is not None:
89
  output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
90
  else:
91
  output_with_notes.append((generated_tokens[row], None))
92
 
 
93
  # Fuse the notes for consecutive output tokens if necessary
94
  for i in range(len(output_with_notes)):
95
  token, coords = output_with_notes[i]
96
  if coords is not None:
97
  best_width, best_patch_start = coords
98
  note_width_generated = kernel_width
99
+ for next_id in range(i+1, min(i+2*kernel_width, len(output_with_notes))):
100
  next_token, next_coords = output_with_notes[next_id]
101
  if next_coords is not None:
102
  next_width, next_patch_start = next_coords
103
+ if best_patch_start + best_width >= next_patch_start:
104
  # then notes are overlapping: thus we delete the last one and make the first wider if needed
105
  output_with_notes[next_id] = (next_token, None)
106
  larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
 
110
  else:
111
  output_with_notes[i] = (token, None, None)
112
 
113
+ # Convert to text slices
114
  for i, (token, coords, width) in enumerate(output_with_notes):
115
  if coords is not None:
116
  best_width, best_patch_start = coords
 
121
  first_part = "".join(input_tokens[context_start:significant_start])
122
  significant_part = "".join(input_tokens[significant_start:significant_end])
123
  final_part = "".join(input_tokens[significant_end:context_end])
 
 
124
  output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
125
 
126
  return output_with_notes
 
133
  (token, notes, width) = output_with_notes[i]
134
  if notes is None:
135
  html += f'{token}'
136
+ i += 1
137
  else:
138
  text = "".join([element[0] for element in output_with_notes[i:i+width]])
139
  first_part, significant_part, final_part = notes
 
141
  html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
142
  html += f'<span class="hover-note">{formatted_note}</span></span>'
143
  note_number += 1
144
+ i += width
145
  html += "</div>"
146
  return html
147