Spaces:
Runtime error
Runtime error
Works well!
Browse files
app.py
CHANGED
@@ -50,30 +50,29 @@ def generate_and_visualize(prompt, num_tokens=10):
|
|
50 |
generated_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(generated_tokens_ids))
|
51 |
|
52 |
return input_tokens, all_relevances, generated_tokens
|
53 |
-
|
54 |
def process_relevances(input_tokens, all_relevances, generated_tokens):
|
55 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
56 |
|
57 |
### FIND ZONES OF INTEREST
|
58 |
threshold_per_token = 0.3
|
59 |
kernel_width = 6
|
|
|
60 |
kernel = np.ones((kernel_width, kernel_width))
|
61 |
|
62 |
# Compute the rolling sum using 2D convolution
|
63 |
rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
|
64 |
|
65 |
-
# Find where the rolled sum is greater than
|
66 |
significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
|
67 |
-
print("SUMM:", significant_areas.sum())
|
68 |
|
69 |
def find_largest_contiguous_patch(array):
|
70 |
-
current_patch_end=None
|
71 |
best_width, best_patch_end = None, None
|
72 |
-
current_width=0
|
73 |
for i in range(len(array)):
|
74 |
if array[i]:
|
75 |
if current_patch_end is not None and current_patch_end == i-1:
|
76 |
-
current_width +=1
|
77 |
current_patch_end = i
|
78 |
else:
|
79 |
current_patch_end = i
|
@@ -86,13 +85,13 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
86 |
return best_width, best_patch_end
|
87 |
|
88 |
output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
|
89 |
-
print(len(generated_tokens), kernel_width, significant_areas.shape)
|
90 |
for row in range(kernel_width, len(generated_tokens)):
|
91 |
-
print(row-kernel_width+1)
|
92 |
best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
|
93 |
|
94 |
if best_width is not None:
|
95 |
-
for
|
|
|
|
|
96 |
if coords is not None:
|
97 |
prev_width, prev_patch_end = coords
|
98 |
if prev_patch_end > best_patch_end - best_width: # then notes are overlapping, thus we delete the first one.
|
@@ -104,20 +103,31 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
|
|
104 |
for i, (token, coords) in enumerate(output_with_notes):
|
105 |
if coords is not None:
|
106 |
best_width, best_patch_end = coords
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
# Fuse the notes for consecutive output tokens if necessary
|
110 |
-
print(output_with_notes)
|
111 |
return output_with_notes
|
112 |
|
113 |
def create_html_with_hover(output_with_notes):
|
114 |
html = "<div id='output-container'>"
|
115 |
for i, (text, notes) in enumerate(output_with_notes):
|
116 |
if notes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
html += f'<span class="hoverable" data-note-id="note-{i}">{text}<sup>[{i+1}]</sup>'
|
118 |
-
html += f'<span class="hover-note">{
|
119 |
else:
|
120 |
-
html += f'{text}
|
121 |
html += "</div>"
|
122 |
return html
|
123 |
|
@@ -140,12 +150,14 @@ css = """
|
|
140 |
bottom: 100%;
|
141 |
left: 50%;
|
142 |
transform: translateX(-50%);
|
143 |
-
white-space:
|
|
|
|
|
|
|
144 |
z-index: 1;
|
145 |
}
|
146 |
.hoverable:hover .hover-note { display: block; }
|
147 |
"""
|
148 |
-
|
149 |
examples = [
|
150 |
[
|
151 |
"""Context: Mount Everest attracts many climbers, including highly experienced mountaineers. There are two main climbing routes, one approaching the summit from the southeast in Nepal (known as the standard route) and the other from the north in Tibet. While not posing substantial technical climbing challenges on the standard route, Everest presents dangers such as altitude sickness, weather, and wind, as well as hazards from avalanches and the Khumbu Icefall. As of November 2022, 310 people have died on Everest. Over 200 bodies remain on the mountain and have not been removed due to the dangerous conditions. The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.
|
|
|
50 |
generated_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(generated_tokens_ids))
|
51 |
|
52 |
return input_tokens, all_relevances, generated_tokens
|
|
|
53 |
def process_relevances(input_tokens, all_relevances, generated_tokens):
|
54 |
attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
|
55 |
|
56 |
### FIND ZONES OF INTEREST
|
57 |
threshold_per_token = 0.3
|
58 |
kernel_width = 6
|
59 |
+
context_width = 20 # Number of tokens to include as context on each side
|
60 |
kernel = np.ones((kernel_width, kernel_width))
|
61 |
|
62 |
# Compute the rolling sum using 2D convolution
|
63 |
rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
|
64 |
|
65 |
+
# Find where the rolled sum is greater than the threshold
|
66 |
significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
|
|
|
67 |
|
68 |
def find_largest_contiguous_patch(array):
|
69 |
+
current_patch_end = None
|
70 |
best_width, best_patch_end = None, None
|
71 |
+
current_width = 0
|
72 |
for i in range(len(array)):
|
73 |
if array[i]:
|
74 |
if current_patch_end is not None and current_patch_end == i-1:
|
75 |
+
current_width += 1
|
76 |
current_patch_end = i
|
77 |
else:
|
78 |
current_patch_end = i
|
|
|
85 |
return best_width, best_patch_end
|
86 |
|
87 |
output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
|
|
|
88 |
for row in range(kernel_width, len(generated_tokens)):
|
|
|
89 |
best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
|
90 |
|
91 |
if best_width is not None:
|
92 |
+
# Fuse the notes for consecutive output tokens if necessary
|
93 |
+
for i in range(len(output_with_notes)-2*kernel_width, len(output_with_notes)):
|
94 |
+
token, coords = output_with_notes[i]
|
95 |
if coords is not None:
|
96 |
prev_width, prev_patch_end = coords
|
97 |
if prev_patch_end > best_patch_end - best_width: # then notes are overlapping, thus we delete the first one.
|
|
|
103 |
for i, (token, coords) in enumerate(output_with_notes):
|
104 |
if coords is not None:
|
105 |
best_width, best_patch_end = coords
|
106 |
+
start = max(0, best_patch_end - best_width - context_width)
|
107 |
+
end = min(len(input_tokens), best_patch_end + kernel_width + context_width)
|
108 |
+
context = input_tokens[start:end]
|
109 |
+
significant_start = max(0, best_patch_end - best_width - start)
|
110 |
+
significant_end = significant_start + best_width + kernel_width
|
111 |
+
output_with_notes[i] = (token, (context, significant_start, significant_end))
|
112 |
|
|
|
|
|
113 |
return output_with_notes
|
114 |
|
115 |
def create_html_with_hover(output_with_notes):
|
116 |
html = "<div id='output-container'>"
|
117 |
for i, (text, notes) in enumerate(output_with_notes):
|
118 |
if notes:
|
119 |
+
context, start, end = notes
|
120 |
+
formatted_context = []
|
121 |
+
for j, token in enumerate(context):
|
122 |
+
if start <= j < end:
|
123 |
+
formatted_context.append(f'<strong>{token}</strong>')
|
124 |
+
else:
|
125 |
+
formatted_context.append(token)
|
126 |
+
formatted_note = " ".join(formatted_context)
|
127 |
html += f'<span class="hoverable" data-note-id="note-{i}">{text}<sup>[{i+1}]</sup>'
|
128 |
+
html += f'<span class="hover-note">{formatted_note}</span></span>'
|
129 |
else:
|
130 |
+
html += f'{text}'
|
131 |
html += "</div>"
|
132 |
return html
|
133 |
|
|
|
150 |
bottom: 100%;
|
151 |
left: 50%;
|
152 |
transform: translateX(-50%);
|
153 |
+
white-space: normal;
|
154 |
+
background-color: rgba(240, 240, 240, 1);
|
155 |
+
max-width: 600px;
|
156 |
+
word-wrap: break-word;
|
157 |
z-index: 1;
|
158 |
}
|
159 |
.hoverable:hover .hover-note { display: block; }
|
160 |
"""
|
|
|
161 |
examples = [
|
162 |
[
|
163 |
"""Context: Mount Everest attracts many climbers, including highly experienced mountaineers. There are two main climbing routes, one approaching the summit from the southeast in Nepal (known as the standard route) and the other from the north in Tibet. While not posing substantial technical climbing challenges on the standard route, Everest presents dangers such as altitude sickness, weather, and wind, as well as hazards from avalanches and the Khumbu Icefall. As of November 2022, 310 people have died on Everest. Over 200 bodies remain on the mountain and have not been removed due to the dangerous conditions. The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.
|