decoding_visualizer

Running on T4

App Files Files Community

m-ric commited on Mar 25, 2024

Commit

50809fa

verified ·

1 Parent(s): 5e72e33

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -32

app.py CHANGED Viewed

@@ -30,14 +30,12 @@ STYLE = """
 .prose table {
     margin-bottom: 0!important;
 }
 .prose td, th {
     padding-left: 2px;
     padding-right: 2px;
     padding-top: 0;
     padding-bottom: 0;
 }
 .tree {
 	padding: 0px;
 	margin: 0!important;
@@ -48,13 +46,11 @@ STYLE = """
 	text-align: center;
     display:inline-block;
 }
 #root {
     display: inline-grid!important;
     width:auto!important;
     min-width: 220px;
 }
 .tree ul {
     padding-left: 20px;
     position: relative;
@@ -75,7 +71,6 @@ STYLE = """
     justify-content: start;
     align-items: center;
 }
 .tree li::before, .tree li::after {
     content: "";
     position: absolute;
@@ -96,7 +91,6 @@ STYLE = """
 .tree li:only-child::after, li:only-child::before {
     display: none;
 }
 .tree li:first-child::before, .tree li:last-child::after {
     border: 0 none;
 }
@@ -111,7 +105,6 @@ STYLE = """
 	-webkit-border-radius: 5px 0 0 0;
 	-moz-border-radius: 5px 0 0 0;
 }
 .tree ul ul::before {
     content: "";
     position: absolute;
@@ -124,7 +117,6 @@ STYLE = """
 .tree ul:has(> li:only-child)::before {
     width:40px;
 }
 a:before {
     border-right: 1px solid var(--body-text-color);
     border-bottom: 1px solid var(--body-text-color);
@@ -138,8 +130,6 @@ a:before {
     margin-left: 6px;
     transform: rotate(315deg);
 }
 .tree li a {
 	border: 1px solid var(--body-text-color);
 	padding: 5px;
@@ -155,7 +145,6 @@ a:before {
 .tree li a span {
 	padding: 5px;
 	font-size: 12px;
-	text-transform: uppercase;
 	letter-spacing: 1px;
 	font-weight: 500;
 }
@@ -166,7 +155,7 @@ a:before {
 .tree li a:hover+ul li::after, .tree li a:hover+ul li::before, .tree li a:hover+ul::before, .tree li a:hover+ul ul::before {
 	border-color: #7c2d12;
 }
-.chosen {
     background-color: #ea580c;
     width:auto!important;
 }
@@ -206,33 +195,37 @@ def generate_markdown_table(
 def generate_nodes(token_ix, node, step):
     """Recursively generate HTML for the tree nodes."""
     token = tokenizer.decode([token_ix])
-    html_content = f" <li> <a href='#' class='{('chosen' if node.table is None else '')}'> <span> <b>{token_ix}:<br>{clean(token)}</b> </span> "
     if node.table is not None:
         html_content += node.table
     html_content += "</a>"
     if len(node.children.keys()) > 0:
         html_content += "<ul> "
         for token_ix, subnode in node.children.items():
             html_content += generate_nodes(token_ix, subnode, step=step + 1)
         html_content += "</ul>"
     html_content += "</li>"
     return html_content
 def generate_html(start_sentence, original_tree):
     html_output = f"""<div class="custom-container">
 				<div class="tree">
-                <ul>
-                <li> <a href='#' id='root'> <span> <b>{start_sentence}</b> </span> {original_tree.table} </a>"""
-    if len(original_tree.children.keys()) > 0:
-        html_output += "<ul> "
-        for token_ix, subnode in original_tree.children.items():
-            html_output += generate_nodes(token_ix, subnode, step=1)
-        html_output += "</ul>"
     html_output += """
-        </ul>
         </div>
     </body>
     """
@@ -246,11 +239,14 @@ from dataclasses import dataclass
 @dataclass
 class BeamNode:
     cumulative_score: float
     children_score_divider: float
     table: str
     current_sentence: str
     children: Dict[int, "BeamNode"]
 def generate_beams(start_sentence, scores, sequences, length_penalty):
@@ -258,13 +254,19 @@ def generate_beams(start_sentence, scores, sequences, length_penalty):
     input_length = len(tokenizer([start_sentence], return_tensors="pt"))
     original_tree = BeamNode(
         cumulative_score=0,
         table=None,
         current_sentence=start_sentence,
         children={},
         children_score_divider=((input_length + 1) ** length_penalty),
     )
     n_beams = len(scores[0])
     beam_trees = [original_tree] * n_beams
     for step, step_scores in enumerate(scores):
         (
             top_token_indexes,
@@ -273,8 +275,13 @@ def generate_beams(start_sentence, scores, sequences, length_penalty):
             current_completions,
             top_tokens,
         ) = ([], [], [], [], [])
-        for beam_ix in range(n_beams):
             current_beam = beam_trees[beam_ix]
             # Get top cumulative scores for the current beam
             current_top_token_indexes = list(
                 np.array(scores[step][beam_ix].argsort()[-n_beams:])[::-1]
@@ -337,14 +344,31 @@ def generate_beams(start_sentence, scores, sequences, length_penalty):
                 + scores[step][source_beam_ix][current_token_choice_ix].numpy()
             )
             beam_trees[source_beam_ix].children[current_token_choice_ix] = BeamNode(
                 table=None,
                 children={},
                 current_sentence=beam_trees[source_beam_ix].current_sentence
                 + current_token_choice,
                 cumulative_score=cumulative_score,
                 children_score_divider=((input_length + step + 1) ** length_penalty),
             )
         # Reassign all beams at once
         beam_trees = [
             beam_trees[int(top_df_selected.iloc[beam_ix]["beam_index"])]
@@ -355,6 +379,7 @@ def generate_beams(start_sentence, scores, sequences, length_penalty):
         for beam_ix in range(n_beams):
             current_token_choice_ix = top_df_selected.iloc[beam_ix]["token_index"]
             beam_trees[beam_ix] = beam_trees[beam_ix].children[current_token_choice_ix]
     return original_tree
@@ -373,9 +398,10 @@ def get_beam_search_html(input_text, number_steps, number_beams, length_penalty)
         do_sample=False,
     )
     markdown = "Output sequences:"
     decoded_sequences = tokenizer.batch_decode(outputs.sequences)
     for i, sequence in enumerate(decoded_sequences):
-        markdown += f"\n- {clean(sequence.replace('<s> ', ''))} (score {outputs.sequences_scores[i]:.2f})"
     original_tree = generate_beams(
         input_text,
@@ -393,7 +419,8 @@ with gr.Blocks(
     ),
     css=STYLE,
 ) as demo:
-    gr.Markdown("""# Beam search visualizer
 Play with the parameters below to understand how beam search decoding works!
@@ -402,15 +429,29 @@ Play with the parameters below to understand how beam search decoding works!
 - **Number of steps**: the number of tokens to generate
 - **Number of beams**: the number of beams to use
 - **Length penalty**: the length penalty to apply to outputs. `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences.
-""")
-    text = gr.Textbox(label="Sentence to decode from", value="Conclusion: thanks a lot. This article was originally published on")
     with gr.Row():
-        steps = gr.Slider(label="Number of steps", minimum=1, maximum=8, step=1, value=4)
-        beams = gr.Slider(label="Number of beams", minimum=2, maximum=4, step=1, value=3)
-        length_penalty = gr.Slider(label="Length penalty", minimum=-4, maximum=4, step=0.5, value=1)
     button = gr.Button()
     out_html = gr.Markdown()
     out_markdown = gr.Markdown()
-    button.click(get_beam_search_html, inputs=[text, steps, beams, length_penalty], outputs=[out_html, out_markdown])
 demo.launch()

 .prose table {
     margin-bottom: 0!important;
 }
 .prose td, th {
     padding-left: 2px;
     padding-right: 2px;
     padding-top: 0;
     padding-bottom: 0;
 }
 .tree {
 	padding: 0px;
 	margin: 0!important;
 	text-align: center;
     display:inline-block;
 }
 #root {
     display: inline-grid!important;
     width:auto!important;
     min-width: 220px;
 }
 .tree ul {
     padding-left: 20px;
     position: relative;
     justify-content: start;
     align-items: center;
 }
 .tree li::before, .tree li::after {
     content: "";
     position: absolute;
 .tree li:only-child::after, li:only-child::before {
     display: none;
 }
 .tree li:first-child::before, .tree li:last-child::after {
     border: 0 none;
 }
 	-webkit-border-radius: 5px 0 0 0;
 	-moz-border-radius: 5px 0 0 0;
 }
 .tree ul ul::before {
     content: "";
     position: absolute;
 .tree ul:has(> li:only-child)::before {
     width:40px;
 }
 a:before {
     border-right: 1px solid var(--body-text-color);
     border-bottom: 1px solid var(--body-text-color);
     margin-left: 6px;
     transform: rotate(315deg);
 }
 .tree li a {
 	border: 1px solid var(--body-text-color);
 	padding: 5px;
 .tree li a span {
 	padding: 5px;
 	font-size: 12px;
 	letter-spacing: 1px;
 	font-weight: 500;
 }
 .tree li a:hover+ul li::after, .tree li a:hover+ul li::before, .tree li a:hover+ul::before, .tree li a:hover+ul ul::before {
 	border-color: #7c2d12;
 }
+.end-of-text, .chosen {
     background-color: #ea580c;
     width:auto!important;
 }
 def generate_nodes(token_ix, node, step):
     """Recursively generate HTML for the tree nodes."""
     token = tokenizer.decode([token_ix])
+    if node.is_final:
+        return f"<li> <a href='#' class='end-of-text'> <span> <b>{token_ix}:<br>{clean(token)}</b> <br> Total score: {node.total_score:.2f} </span> </a> </li>"
+    html_content = (
+        f"<li> <a href='#'> <span> <b>{token_ix}:<br>{clean(token)}</b> </span>"
+    )
     if node.table is not None:
         html_content += node.table
     html_content += "</a>"
     if len(node.children.keys()) > 0:
         html_content += "<ul> "
         for token_ix, subnode in node.children.items():
             html_content += generate_nodes(token_ix, subnode, step=step + 1)
         html_content += "</ul>"
     html_content += "</li>"
     return html_content
 def generate_html(start_sentence, original_tree):
     html_output = f"""<div class="custom-container">
 				<div class="tree">
+                <ul> <li> <a href='#' id='root'> <span> <b>{start_sentence}</b> </span> {original_tree.table} </a>"""
+    html_output += "<ul> "
+    for token_ix, subnode in original_tree.children.items():
+        html_output += generate_nodes(token_ix, subnode, step=1)
+    html_output += "</ul>"
     html_output += """
+        </li> </ul>
         </div>
     </body>
     """
 @dataclass
 class BeamNode:
+    current_token_ix: int
     cumulative_score: float
     children_score_divider: float
     table: str
     current_sentence: str
     children: Dict[int, "BeamNode"]
+    total_score: float
+    is_final: bool
 def generate_beams(start_sentence, scores, sequences, length_penalty):
     input_length = len(tokenizer([start_sentence], return_tensors="pt"))
     original_tree = BeamNode(
         cumulative_score=0,
+        current_token_ix=None,
         table=None,
         current_sentence=start_sentence,
         children={},
         children_score_divider=((input_length + 1) ** length_penalty),
+        total_score=None,
+        is_final=False,
     )
     n_beams = len(scores[0])
     beam_trees = [original_tree] * n_beams
+    candidate_nodes = []
     for step, step_scores in enumerate(scores):
         (
             top_token_indexes,
             current_completions,
             top_tokens,
         ) = ([], [], [], [], [])
+        for beam_ix in range(n_beams):  # Get possible descendants for each beam
             current_beam = beam_trees[beam_ix]
+            # skip if the beam is already final
+            if current_beam.is_final:
+                continue
             # Get top cumulative scores for the current beam
             current_top_token_indexes = list(
                 np.array(scores[step][beam_ix].argsort()[-n_beams:])[::-1]
                 + scores[step][source_beam_ix][current_token_choice_ix].numpy()
             )
             beam_trees[source_beam_ix].children[current_token_choice_ix] = BeamNode(
+                current_token_ix=current_token_choice_ix,
                 table=None,
                 children={},
                 current_sentence=beam_trees[source_beam_ix].current_sentence
                 + current_token_choice,
                 cumulative_score=cumulative_score,
+                total_score=cumulative_score
+                / ((input_length + step - 1) ** length_penalty),
                 children_score_divider=((input_length + step + 1) ** length_penalty),
+                is_final=(
+                    step == len(scores) - 1
+                    or current_token_choice_ix == tokenizer.eos_token_id
+                ),
             )
+            # Check this child should be selected as a top beam.
+            # Is it a final step or an EOS token?
+            if (
+                step == len(scores) - 1
+                or current_token_choice_ix == tokenizer.eos_token_id
+            ):
+                candidate_nodes.append(
+                    beam_trees[source_beam_ix].children[current_token_choice_ix]
+                )
         # Reassign all beams at once
         beam_trees = [
             beam_trees[int(top_df_selected.iloc[beam_ix]["beam_index"])]
         for beam_ix in range(n_beams):
             current_token_choice_ix = top_df_selected.iloc[beam_ix]["token_index"]
             beam_trees[beam_ix] = beam_trees[beam_ix].children[current_token_choice_ix]
+    print("Final nodes", candidate_nodes)
     return original_tree
         do_sample=False,
     )
     markdown = "Output sequences:"
+    # Sequences are padded anyway so you can batch decode them
     decoded_sequences = tokenizer.batch_decode(outputs.sequences)
     for i, sequence in enumerate(decoded_sequences):
+        markdown += f"\n- '{clean(sequence.replace('<s> ', ''))}' (score {outputs.sequences_scores[i]:.2f})"
     original_tree = generate_beams(
         input_text,
     ),
     css=STYLE,
 ) as demo:
+    gr.Markdown(
+        """# Beam search visualizer
 Play with the parameters below to understand how beam search decoding works!
 - **Number of steps**: the number of tokens to generate
 - **Number of beams**: the number of beams to use
 - **Length penalty**: the length penalty to apply to outputs. `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences.
+"""
+    )
+    text = gr.Textbox(
+        label="Sentence to decode from",
+        value="Conclusion: thanks a lot. This article was originally published on",
+    )
     with gr.Row():
+        steps = gr.Slider(
+            label="Number of steps", minimum=1, maximum=8, step=1, value=4
+        )
+        beams = gr.Slider(
+            label="Number of beams", minimum=2, maximum=4, step=1, value=3
+        )
+        length_penalty = gr.Slider(
+            label="Length penalty", minimum=-4, maximum=4, step=0.5, value=1
+        )
     button = gr.Button()
     out_html = gr.Markdown()
     out_markdown = gr.Markdown()
+    button.click(
+        get_beam_search_html,
+        inputs=[text, steps, beams, length_penalty],
+        outputs=[out_html, out_markdown],
+    )
 demo.launch()