m-ric HF Staff commited on
Commit
719af86
Β·
1 Parent(s): e90d7e4

Works well!

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -50,30 +50,29 @@ def generate_and_visualize(prompt, num_tokens=10):
50
  generated_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(generated_tokens_ids))
51
 
52
  return input_tokens, all_relevances, generated_tokens
53
-
54
  def process_relevances(input_tokens, all_relevances, generated_tokens):
55
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
56
 
57
  ### FIND ZONES OF INTEREST
58
  threshold_per_token = 0.3
59
  kernel_width = 6
 
60
  kernel = np.ones((kernel_width, kernel_width))
61
 
62
  # Compute the rolling sum using 2D convolution
63
  rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
64
 
65
- # Find where the rolled sum is greater than 1.0
66
  significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
67
- print("SUMM:", significant_areas.sum())
68
 
69
  def find_largest_contiguous_patch(array):
70
- current_patch_end=None
71
  best_width, best_patch_end = None, None
72
- current_width=0
73
  for i in range(len(array)):
74
  if array[i]:
75
  if current_patch_end is not None and current_patch_end == i-1:
76
- current_width +=1
77
  current_patch_end = i
78
  else:
79
  current_patch_end = i
@@ -86,13 +85,13 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
86
  return best_width, best_patch_end
87
 
88
  output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
89
- print(len(generated_tokens), kernel_width, significant_areas.shape)
90
  for row in range(kernel_width, len(generated_tokens)):
91
- print(row-kernel_width+1)
92
  best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
93
 
94
  if best_width is not None:
95
- for i, (token, coords) in enumerate(output_with_notes[-2*kernel_width:]):
 
 
96
  if coords is not None:
97
  prev_width, prev_patch_end = coords
98
  if prev_patch_end > best_patch_end - best_width: # then notes are overlapping, thus we delete the first one.
@@ -104,20 +103,31 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
104
  for i, (token, coords) in enumerate(output_with_notes):
105
  if coords is not None:
106
  best_width, best_patch_end = coords
107
- output_with_notes[i] = (token, input_tokens[best_patch_end-best_width: best_patch_end+kernel_width-1])
 
 
 
 
 
108
 
109
- # Fuse the notes for consecutive output tokens if necessary
110
- print(output_with_notes)
111
  return output_with_notes
112
 
113
  def create_html_with_hover(output_with_notes):
114
  html = "<div id='output-container'>"
115
  for i, (text, notes) in enumerate(output_with_notes):
116
  if notes:
 
 
 
 
 
 
 
 
117
  html += f'<span class="hoverable" data-note-id="note-{i}">{text}<sup>[{i+1}]</sup>'
118
- html += f'<span class="hover-note">{", ".join(notes)}</span></span> '
119
  else:
120
- html += f'{text} '
121
  html += "</div>"
122
  return html
123
 
@@ -140,12 +150,14 @@ css = """
140
  bottom: 100%;
141
  left: 50%;
142
  transform: translateX(-50%);
143
- white-space: nowrap;
 
 
 
144
  z-index: 1;
145
  }
146
  .hoverable:hover .hover-note { display: block; }
147
  """
148
-
149
  examples = [
150
  [
151
  """Context: Mount Everest attracts many climbers, including highly experienced mountaineers. There are two main climbing routes, one approaching the summit from the southeast in Nepal (known as the standard route) and the other from the north in Tibet. While not posing substantial technical climbing challenges on the standard route, Everest presents dangers such as altitude sickness, weather, and wind, as well as hazards from avalanches and the Khumbu Icefall. As of November 2022, 310 people have died on Everest. Over 200 bodies remain on the mountain and have not been removed due to the dangerous conditions. The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.
 
50
  generated_tokens = really_clean_tokens(tokenizer.convert_ids_to_tokens(generated_tokens_ids))
51
 
52
  return input_tokens, all_relevances, generated_tokens
 
53
  def process_relevances(input_tokens, all_relevances, generated_tokens):
54
  attention_matrix = np.array([el[:len(all_relevances[0])] for el in all_relevances])
55
 
56
  ### FIND ZONES OF INTEREST
57
  threshold_per_token = 0.3
58
  kernel_width = 6
59
+ context_width = 20 # Number of tokens to include as context on each side
60
  kernel = np.ones((kernel_width, kernel_width))
61
 
62
  # Compute the rolling sum using 2D convolution
63
  rolled_sum = convolve2d(attention_matrix, kernel, mode='valid')
64
 
65
+ # Find where the rolled sum is greater than the threshold
66
  significant_areas = rolled_sum > kernel_width**2 * threshold_per_token
 
67
 
68
  def find_largest_contiguous_patch(array):
69
+ current_patch_end = None
70
  best_width, best_patch_end = None, None
71
+ current_width = 0
72
  for i in range(len(array)):
73
  if array[i]:
74
  if current_patch_end is not None and current_patch_end == i-1:
75
+ current_width += 1
76
  current_patch_end = i
77
  else:
78
  current_patch_end = i
 
85
  return best_width, best_patch_end
86
 
87
  output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
 
88
  for row in range(kernel_width, len(generated_tokens)):
 
89
  best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
90
 
91
  if best_width is not None:
92
+ # Fuse the notes for consecutive output tokens if necessary
93
+ for i in range(len(output_with_notes)-2*kernel_width, len(output_with_notes)):
94
+ token, coords = output_with_notes[i]
95
  if coords is not None:
96
  prev_width, prev_patch_end = coords
97
  if prev_patch_end > best_patch_end - best_width: # then notes are overlapping, thus we delete the first one.
 
103
  for i, (token, coords) in enumerate(output_with_notes):
104
  if coords is not None:
105
  best_width, best_patch_end = coords
106
+ start = max(0, best_patch_end - best_width - context_width)
107
+ end = min(len(input_tokens), best_patch_end + kernel_width + context_width)
108
+ context = input_tokens[start:end]
109
+ significant_start = max(0, best_patch_end - best_width - start)
110
+ significant_end = significant_start + best_width + kernel_width
111
+ output_with_notes[i] = (token, (context, significant_start, significant_end))
112
 
 
 
113
  return output_with_notes
114
 
115
  def create_html_with_hover(output_with_notes):
116
  html = "<div id='output-container'>"
117
  for i, (text, notes) in enumerate(output_with_notes):
118
  if notes:
119
+ context, start, end = notes
120
+ formatted_context = []
121
+ for j, token in enumerate(context):
122
+ if start <= j < end:
123
+ formatted_context.append(f'<strong>{token}</strong>')
124
+ else:
125
+ formatted_context.append(token)
126
+ formatted_note = " ".join(formatted_context)
127
  html += f'<span class="hoverable" data-note-id="note-{i}">{text}<sup>[{i+1}]</sup>'
128
+ html += f'<span class="hover-note">{formatted_note}</span></span>'
129
  else:
130
+ html += f'{text}'
131
  html += "</div>"
132
  return html
133
 
 
150
  bottom: 100%;
151
  left: 50%;
152
  transform: translateX(-50%);
153
+ white-space: normal;
154
+ background-color: rgba(240, 240, 240, 1);
155
+ max-width: 600px;
156
+ word-wrap: break-word;
157
  z-index: 1;
158
  }
159
  .hoverable:hover .hover-note { display: block; }
160
  """
 
161
  examples = [
162
  [
163
  """Context: Mount Everest attracts many climbers, including highly experienced mountaineers. There are two main climbing routes, one approaching the summit from the southeast in Nepal (known as the standard route) and the other from the north in Tibet. While not posing substantial technical climbing challenges on the standard route, Everest presents dangers such as altitude sickness, weather, and wind, as well as hazards from avalanches and the Khumbu Icefall. As of November 2022, 310 people have died on Everest. Over 200 bodies remain on the mountain and have not been removed due to the dangerous conditions. The first recorded efforts to reach Everest's summit were made by British mountaineers. As Nepal did not allow foreigners to enter the country at the time, the British made several attempts on the north ridge route from the Tibetan side. After the first reconnaissance expedition by the British in 1921 reached 7,000 m (22,970 ft) on the North Col, the 1922 expedition pushed the north ridge route up to 8,320 m (27,300 ft), marking the first time a human had climbed above 8,000 m (26,247 ft). The 1924 expedition resulted in one of the greatest mysteries on Everest to this day: George Mallory and Andrew Irvine made a final summit attempt on 8 June but never returned, sparking debate as to whether they were the first to reach the top. Tenzing Norgay and Edmund Hillary made the first documented ascent of Everest in 1953, using the southeast ridge route. Norgay had reached 8,595 m (28,199 ft) the previous year as a member of the 1952 Swiss expedition. The Chinese mountaineering team of Wang Fuzhou, Gonpo, and Qu Yinhua made the first reported ascent of the peak from the north ridge on 25 May 1960.