Spaces:

m-ric
/

rag_highlights

Runtime error

App Files Files Community

m-ric HF Staff commited on Sep 27, 2024

Commit

e90d7e4

1 Parent(s): a02bd43

Improve notes

Browse files

Files changed (1) hide show

app.py +34 -12

app.py CHANGED Viewed

@@ -13,9 +13,15 @@ attnlrp.register(model)
 def really_clean_tokens(tokens):
-    tokens = clean_tokens(tokens)
-    tokens = [token.replace("_", " ").replace("▁", " ").replace("<s>", "") for token in tokens]
-    return tokens
 @spaces.GPU
 def generate_and_visualize(prompt, num_tokens=10):
@@ -50,7 +56,7 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
     ### FIND ZONES OF INTEREST
     threshold_per_token = 0.3
-    kernel_width = 5
     kernel = np.ones((kernel_width, kernel_width))
     # Compute the rolling sum using 2D convolution
@@ -72,19 +78,35 @@ def process_relevances(input_tokens, all_relevances, generated_tokens):
                 else:
                     current_patch_end = i
                     current_width = 1
-                if current_patch_end and best_width is None or current_width > best_width:
-                    best_end = current_patch_end
                     best_width = current_width
             else:
                 current_width = 0
         return best_width, best_patch_end
-    output_with_notes = [generated_tokens[:kernel_width]]
-    for row in range(significant_areas.shape[0]):
-        best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row])
         if best_width is not None:
-            output_with_notes.append((generated_tokens[row + kernel_width], input_tokens[best_patch_end-best_width, best_width+kernel_width]))
-    print("OUTPUT WIHT NOTES/::")
     print(output_with_notes)
     return output_with_notes
@@ -161,7 +183,7 @@ with gr.Blocks(css=css) as demo:
     gr.Markdown("# Token Generation with Hover Notes")
     input_text = gr.Textbox(label="Enter your prompt:", lines=10, value=examples[0][0])
-    num_tokens = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Number of tokens to generate")
     generate_button = gr.Button("Generate")
     output_html = gr.HTML(label="Generated Output")

 def really_clean_tokens(tokens):
+    cleaned_tokens = []
+    for token in tokens:
+        token = token.replace("_", " ").replace("▁", " ").replace("<s>", "").strip()
+        if token.startswith("<0x") and token.endswith(">"):
+            # Convert hex to character
+            char_code = int(token[3:-1], 16)
+            token = chr(char_code)
+        cleaned_tokens.append(token)
+    return cleaned_tokens
 @spaces.GPU
 def generate_and_visualize(prompt, num_tokens=10):
     ### FIND ZONES OF INTEREST
     threshold_per_token = 0.3
+    kernel_width = 6
     kernel = np.ones((kernel_width, kernel_width))
     # Compute the rolling sum using 2D convolution
                 else:
                     current_patch_end = i
                     current_width = 1
+                if current_patch_end and (best_width is None or current_width > best_width):
+                    best_patch_end = current_patch_end
                     best_width = current_width
             else:
                 current_width = 0
         return best_width, best_patch_end
+    output_with_notes = [(el, None) for el in generated_tokens[:kernel_width]]
+    print(len(generated_tokens), kernel_width, significant_areas.shape)
+    for row in range(kernel_width, len(generated_tokens)):
+        print(row-kernel_width+1)
+        best_width, best_patch_end = find_largest_contiguous_patch(significant_areas[row-kernel_width+1])
         if best_width is not None:
+            for i, (token, coords) in enumerate(output_with_notes[-2*kernel_width:]):
+                if coords is not None:
+                    prev_width, prev_patch_end = coords
+                    if prev_patch_end > best_patch_end - best_width: # then notes are overlapping, thus we delete the first one.
+                        output_with_notes[i] = (token, None)
+            output_with_notes.append((generated_tokens[row], (best_width, best_patch_end)))
+        else:
+            output_with_notes.append((generated_tokens[row], None))
+    for i, (token, coords) in enumerate(output_with_notes):
+        if coords is not None:
+            best_width, best_patch_end = coords
+            output_with_notes[i] = (token, input_tokens[best_patch_end-best_width: best_patch_end+kernel_width-1])
+    # Fuse the notes for consecutive output tokens if necessary
     print(output_with_notes)
     return output_with_notes
     gr.Markdown("# Token Generation with Hover Notes")
     input_text = gr.Textbox(label="Enter your prompt:", lines=10, value=examples[0][0])
+    num_tokens = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Number of tokens to generate")
     generate_button = gr.Button("Generate")
     output_html = gr.HTML(label="Generated Output")