m-ric HF Staff commited on
Commit
e90d7e4
Β·
1 Parent(s): a02bd43

Improve notes

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -13,9 +13,15 @@ attnlrp.register(model)
13
 
14
 
15
  def really_clean_tokens(tokens):
16
- tokens = clean_tokens(tokens)
17
- tokens = [token.replace("_", " ").replace("▁", " ").replace("<s>", "") for token in tokens]
18
- return tokens
 
 
 
 
 
 
19
 
20
  @spaces.GPU
21
  def generate_and_visualize(prompt, num_tokens=10):
@@ -50,7 +56,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
50
 
51
  ### FIND ZONES OF INTEREST
52
  threshold_per_token = 0.3
53
- kernel_width = 5
54
  kernel = np.ones((kernel_width, kernel_width))
55
 
56
  # Compute the rolling sum using 2D convolution
@@ -72,19 +78,35 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
72
  else:
73
  current_patch_end = i
74
  current_width = 1
75
- if current_patch_end and best_width is None or current_width > best_width:
76
- best_end = current_patch_end
77
  best_width = current_width
78
  else:
79
  current_width = 0
80
  return best_width, best_patch_end
81
 
82
- output_with_notes = [generated_tokens[:kernel_width]]
83
- for row in range(significant_areas.shape[0]):
84
- best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row])
 
 
 
85
  if best_width is not None:
86
- output_with_notes.append((generated_tokens[row + kernel_width], input_tokens[best_patch_end-best_width, best_width+kernel_width]))
87
- print("OUTPUT WIHT NOTES/::")
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  print(output_with_notes)
89
  return output_with_notes
90
 
@@ -161,7 +183,7 @@ with gr.Blocks(css=css) as demo:
161
  gr.Markdown("# Token Generation with Hover Notes")
162
 
163
  input_text = gr.Textbox(label="Enter your prompt:", lines=10, value=examples[0][0])
164
- num_tokens = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Number of tokens to generate")
165
  generate_button = gr.Button("Generate")
166
 
167
  output_html = gr.HTML(label="Generated Output")
 
13
 
14
 
15
  def really_clean_tokens(tokens):
16
+ cleaned_tokens = []
17
+ for token in tokens:
18
+ token = token.replace("_", " ").replace("▁", " ").replace("<s>", "").strip()
19
+ if token.startswith("<0x") and token.endswith(">"):
20
+ # Convert hex to character
21
+ char_code = int(token[3:-1], 16)
22
+ token = chr(char_code)
23
+ cleaned_tokens.append(token)
24
+ return cleaned_tokens
25
 
26
  @spaces.GPU
27
  def generate_and_visualize(prompt, num_tokens=10):
 
56
 
57
  ### FIND ZONES OF INTEREST
58
  threshold_per_token = 0.3
59
+ kernel_width = 6
60
  kernel = np.ones((kernel_width, kernel_width))
61
 
62
  # Compute the rolling sum using 2D convolution
 
78
  else:
79
  current_patch_end = i
80
  current_width = 1
81
+ if current_patch_end and (best_width is None or current_width > best_width):
82
+ best_patch_end = current_patch_end
83
  best_width = current_width
84
  else:
85
  current_width = 0
86
  return best_width, best_patch_end
87
 
88
+ output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
89
+ print(len(generated_tokens), kernel_width, significant_areas.shape)
90
+ for row in range(kernel_width, len(generated_tokens)):
91
+ print(row-kernel_width+1)
92
+ best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
93
+
94
  if best_width is not None:
95
+ for i, (token, coords) in enumerate(output_with_notes[-2*kernel_width:]):
96
+ if coords is not None:
97
+ prev_width, prev_patch_end = coords
98
+ if prev_patch_end > best_patch_end - best_width: # then notes are overlapping, thus we delete the first one.
99
+ output_with_notes[i] = (token, None)
100
+ output_with_notes.append((generated_tokens[row], (best_width, best_patch_end)))
101
+ else:
102
+ output_with_notes.append((generated_tokens[row], None))
103
+
104
+ for i, (token, coords) in enumerate(output_with_notes):
105
+ if coords is not None:
106
+ best_width, best_patch_end = coords
107
+ output_with_notes[i] = (token, input_tokens[best_patch_end-best_width: best_patch_end+kernel_width-1])
108
+
109
+ # Fuse the notes for consecutive output tokens if necessary
110
  print(output_with_notes)
111
  return output_with_notes
112
 
 
183
  gr.Markdown("# Token Generation with Hover Notes")
184
 
185
  input_text = gr.Textbox(label="Enter your prompt:", lines=10, value=examples[0][0])
186
+ num_tokens = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Number of tokens to generate")
187
  generate_button = gr.Button("Generate")
188
 
189
  output_html = gr.HTML(label="Generated Output")