Spaces:

harmdevries
/

transformer_inference

Runtime error

harmdevries commited on Oct 22, 2022

Commit

28b4830

1 Parent(s): 8cbefab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,17 +1,17 @@
 import streamlit as st
-def number_field(label, **args):
   c1, c2 = st.columns([2, 4])
   c1.write(label)
-  return c2.number_input('', **args)
 def print_kernel_execution(c1, c2, comp_flop, mem_bytes):
   arith_int = comp_flop/mem_bytes
   exec_time = (comp_flop/TFLOPS + mem_bytes/GB_S)*1000
-  comp_flop = round(mha_flop/1e9, 2)
-  mem_bytes = round(mha_bytes/1e6, 2)
   c1.write("GFLOP:")
   c2.write(str(comp_flop))
@@ -66,17 +66,17 @@ mqa_bytes = 2*bs*h*(d/h) + 2*bs*n*(d/h) + 2*bs*h*n
 c1, c2 = st.columns([2, 3])
 att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
-st.header('Attention scores: ')
 st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
 st.subheader("Multi-Head Attention")
-mha_flop = 2*bs*h*(d/h)*n
-mha_bytes = 2*bs*h*(d/h) + 2*bs*h*n*(d/h) + 2*bs*h*n
 c1, c2 = st.columns([2, 3])
 att_mha_time = print_kernel_execution(c1, c2, mha_flop, mha_bytes)
 st.subheader("Multi-Query Attention")
-mqa_flop = 2*bs*h*(d/h)*n
-mqa_bytes = 2*bs*h*(d/h) + 2*bs*n*(d/h) + 2*bs*h*n
 c1, c2 = st.columns([2, 3])
 att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)

 import streamlit as st
+def number_field(label, **kwargs)
   c1, c2 = st.columns([2, 4])
   c1.write(label)
+  return c2.number_input('', **kwargs)
 def print_kernel_execution(c1, c2, comp_flop, mem_bytes):
   arith_int = comp_flop/mem_bytes
   exec_time = (comp_flop/TFLOPS + mem_bytes/GB_S)*1000
+  comp_flop = round(comp_flop/1e9, 2)
+  mem_bytes = round(comp_bytes/1e6, 2)
   c1.write("GFLOP:")
   c2.write(str(comp_flop))
 c1, c2 = st.columns([2, 3])
 att1_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)
+st.header('Attention scores: attention-value gemm')
 st.write("Calculation depends on sequence length. We show numbers for maximum sequence length n.")
 st.subheader("Multi-Head Attention")
+mha_flop = 2*bs*h*n*(d/h)
+mha_bytes = 2*bs*h*n + 2*bs*h*n*(d/h) + 2*bs*h*(d/h)
 c1, c2 = st.columns([2, 3])
 att_mha_time = print_kernel_execution(c1, c2, mha_flop, mha_bytes)
 st.subheader("Multi-Query Attention")
+mqa_flop = 2*bs*h*n*(d/h)
+mqa_bytes = 2*bs*n*(d/h) + 2*bs*n*(d/h) + 2*bs*h*(d/h)
 c1, c2 = st.columns([2, 3])
 att_mqa_time = print_kernel_execution(c1, c2, mqa_flop, mqa_bytes)