Spaces:

adamcasson
/

transformer-flops-calculator

Running

App Files Files Community

adamcasson commited on Apr 15, 2023

Commit

64e5a86

1 Parent(s): caf7cfa

add app

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+def deepmind_flops(n_layer, d_model, d_ff, d_attn, n_ctx, n_vocab, n_heads):
+    embeddings = 2 * n_ctx * n_vocab * d_model
+    attn_qkv = 2 * n_ctx * 3 * d_model * (d_attn * n_heads)
+    attn_logits = 2 * n_ctx * n_ctx * (d_attn * n_heads)
+    attn_softmax = 3 * n_heads * n_ctx * n_ctx
+    attn_reduce = 2 * n_ctx * n_ctx * (d_attn * n_heads)
+    attn_project = 2 * n_ctx * (d_attn * n_heads) * d_model
+    ff = 2 * n_ctx * (d_model * d_ff + d_model * d_ff)
+    logits = 2 * n_ctx * d_model * n_vocab
+    return (
+        embeddings,
+        attn_qkv * n_layer,
+        attn_logits * n_layer,
+        attn_softmax * n_layer,
+        attn_reduce * n_layer,
+        attn_project * n_layer,
+        ff * n_layer,
+        logits,
+    )
+def calculator(n_layer, d_model, n_heads, n_vocab, n_ctx, ff_ratio, incl_embed):
+    d_attn = d_model // n_heads
+    if d_model % n_heads != 0:
+        raise gr.Error("d_model must be divisible by n_heads")
+    d_ff = d_model * ff_ratio
+    flops_terms = deepmind_flops(
+        n_layer, d_model, d_ff, d_attn, n_ctx, n_vocab, n_heads
+    )
+    if incl_embed:
+        flops_per_sequence = sum(flops_terms)
+    else:
+        flops_per_sequence = sum(flops_terms[1:-1])
+    return flops_per_sequence, flops_per_sequence / n_ctx
+with gr.Blocks() as iface:
+    with gr.Row():
+        with gr.Column():
+            n_layer = gr.Number(label="Number of layers (n_layer)")
+            d_model = gr.Number(label="Model dimensions (d_model)")
+            n_heads = gr.Number(label="Number of attention heads per layer (n_heads)")
+            n_vocab = gr.Number(label="Vocabulary size (n_vocab)")
+            n_ctx = gr.Number(label="Sequence length")
+            ff_ratio = gr.Number(value=4, label="Feedforward ratio")
+            incl_embed = gr.Checkbox(
+                value=True, label="Include embedding and logits FLOPs"
+            )
+            btn = gr.Button(value="Submit", variant="primary")
+        with gr.Column():
+            flops_per_sequence = gr.Number(label="FLOPs per sequence")
+            flops_per_token = gr.Number(label="FLOPs per token")
+    btn.click(
+        calculator,
+        inputs=[n_layer, d_model, n_heads, n_vocab, n_ctx, ff_ratio, incl_embed],
+        outputs=[flops_per_sequence, flops_per_token],
+    )
+    gr.Markdown("### GPT-3 model family examples")
+    gr.Markdown(
+        "In order are the 125M, 350M, 1.3B, 2.7B, 6.7B, 13B, 30B, 66B, and 175B parameter variants."
+    )
+    gr.Examples(
+        [
+            [12, 768, 12, 50257, 4096, 4, True],
+            [24, 1024, 16, 50257, 4096, 4, True],
+            [24, 2048, 32, 50257, 4096, 4, True],
+            [32, 2560, 32, 50257, 4096, 4, True],
+            [32, 4096, 32, 50257, 4096, 4, True],
+            [40, 5120, 40, 50257, 4096, 4, True],
+            [48, 7168, 56, 50257, 4096, 4, True],
+            [64, 9216, 72, 50257, 4096, 4, True],
+            [96, 12288, 96, 50257, 4096, 4, True],
+        ],
+        [n_layer, d_model, n_heads, n_vocab, n_ctx, ff_ratio, incl_embed],
+        [flops_per_sequence, flops_per_token],
+        calculator,
+        cache_examples=False,
+    )
+iface.launch()