Spaces:
Runtime error
Runtime error
Ok2
Browse files
app.py
CHANGED
@@ -53,7 +53,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
53 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
54 |
|
55 |
### FIND ZONES OF INTEREST
|
56 |
-
threshold_per_token = 0.
|
57 |
kernel_width = 6
|
58 |
context_width = 20 # Number of tokens to include as context on each side
|
59 |
kernel = np.ones((kernel_width, kernel_width))
|
@@ -85,24 +85,22 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
85 |
output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
|
86 |
for row in range(kernel_width, len(generated_tokens)):
|
87 |
best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
|
88 |
-
|
89 |
if best_width is not None:
|
90 |
output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
|
91 |
else:
|
92 |
output_with_notes.append((generated_tokens[row], None))
|
93 |
|
94 |
-
|
95 |
# Fuse the notes for consecutive output tokens if necessary
|
96 |
for i in range(len(output_with_notes)):
|
97 |
token, coords = output_with_notes[i]
|
98 |
if coords is not None:
|
99 |
best_width, best_patch_start = coords
|
100 |
note_width_generated = kernel_width
|
101 |
-
for next_id in
|
102 |
next_token, next_coords = output_with_notes[next_id]
|
103 |
if next_coords is not None:
|
104 |
next_width, next_patch_start = next_coords
|
105 |
-
if best_patch_start + best_width
|
106 |
# then notes are overlapping: thus we delete the last one and make the first wider if needed
|
107 |
output_with_notes[next_id] = (next_token, None)
|
108 |
larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
|
@@ -112,6 +110,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
112 |
else:
|
113 |
output_with_notes[i] = (token, None, None)
|
114 |
|
|
|
115 |
for i, (token, coords, width) in enumerate(output_with_notes):
|
116 |
if coords is not None:
|
117 |
best_width, best_patch_start = coords
|
@@ -122,8 +121,6 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
122 |
first_part = "".join(input_tokens[context_start:significant_start])
|
123 |
significant_part = "".join(input_tokens[significant_start:significant_end])
|
124 |
final_part = "".join(input_tokens[significant_end:context_end])
|
125 |
-
print("KK", first_part, significant_part, final_part)
|
126 |
-
|
127 |
output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
|
128 |
|
129 |
return output_with_notes
|
@@ -136,7 +133,7 @@ def create_html_with_hover(output_with_notes):
|
|
136 |
(token, notes, width) = output_with_notes[i]
|
137 |
if notes is None:
|
138 |
html += f'{token}'
|
139 |
-
i +=1
|
140 |
else:
|
141 |
text = "".join([element[0] for element in output_with_notes[i:i+width]])
|
142 |
first_part, significant_part, final_part = notes
|
@@ -144,7 +141,7 @@ def create_html_with_hover(output_with_notes):
|
|
144 |
html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
|
145 |
html += f'<span class="hover-note">{formatted_note}</span></span>'
|
146 |
note_number += 1
|
147 |
-
i+=width
|
148 |
html += "</div>"
|
149 |
return html
|
150 |
|
|
|
53 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
54 |
|
55 |
### FIND ZONES OF INTEREST
|
56 |
+
threshold_per_token = 0.22
|
57 |
kernel_width = 6
|
58 |
context_width = 20 # Number of tokens to include as context on each side
|
59 |
kernel = np.ones((kernel_width, kernel_width))
|
|
|
85 |
output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
|
86 |
for row in range(kernel_width, len(generated_tokens)):
|
87 |
best_width, best_patch_start = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
|
|
|
88 |
if best_width is not None:
|
89 |
output_with_notes.append((generated_tokens[row], (best_width, best_patch_start)))
|
90 |
else:
|
91 |
output_with_notes.append((generated_tokens[row], None))
|
92 |
|
|
|
93 |
# Fuse the notes for consecutive output tokens if necessary
|
94 |
for i in range(len(output_with_notes)):
|
95 |
token, coords = output_with_notes[i]
|
96 |
if coords is not None:
|
97 |
best_width, best_patch_start = coords
|
98 |
note_width_generated = kernel_width
|
99 |
+
for next_id in range(i+1, min(i+2*kernel_width, len(output_with_notes))):
|
100 |
next_token, next_coords = output_with_notes[next_id]
|
101 |
if next_coords is not None:
|
102 |
next_width, next_patch_start = next_coords
|
103 |
+
if best_patch_start + best_width >= next_patch_start:
|
104 |
# then notes are overlapping: thus we delete the last one and make the first wider if needed
|
105 |
output_with_notes[next_id] = (next_token, None)
|
106 |
larger_end = max(best_patch_start + best_width, next_patch_start + next_width)
|
|
|
110 |
else:
|
111 |
output_with_notes[i] = (token, None, None)
|
112 |
|
113 |
+
# Convert to text slices
|
114 |
for i, (token, coords, width) in enumerate(output_with_notes):
|
115 |
if coords is not None:
|
116 |
best_width, best_patch_start = coords
|
|
|
121 |
first_part = "".join(input_tokens[context_start:significant_start])
|
122 |
significant_part = "".join(input_tokens[significant_start:significant_end])
|
123 |
final_part = "".join(input_tokens[significant_end:context_end])
|
|
|
|
|
124 |
output_with_notes[i] = (token, (first_part, significant_part, final_part), width)
|
125 |
|
126 |
return output_with_notes
|
|
|
133 |
(token, notes, width) = output_with_notes[i]
|
134 |
if notes is None:
|
135 |
html += f'{token}'
|
136 |
+
i += 1
|
137 |
else:
|
138 |
text = "".join([element[0] for element in output_with_notes[i:i+width]])
|
139 |
first_part, significant_part, final_part = notes
|
|
|
141 |
html += f'<span class="hoverable" data-note-id="note-{note_number}">{text}<sup>[{note_number+1}]</sup>'
|
142 |
html += f'<span class="hover-note">{formatted_note}</span></span>'
|
143 |
note_number += 1
|
144 |
+
i += width
|
145 |
html += "</div>"
|
146 |
return html
|
147 |
|