xxyux commited on
Commit
719f946
·
verified ·
1 Parent(s): ac04d4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -44,9 +44,9 @@ def Compute_Parameters_attention(hidden_size, kv_hidden_size, is_bias, act_func,
44
  # attention:
45
  # layernorm: h/2h
46
  if act_func == "LLaMA":
47
- num_parameters_mlp = hidden_size # RMSNorm
48
  else:
49
- num_parameters_mlp = 2 * hidden_size # LayerNorm
50
  # QKV weight: 3h*h/tp, bias: 3h/tp
51
  # output linear weight: h*h/tp, bias: h
52
  num_parameters_attention_Q_weight = hidden_size * hidden_size / tp
@@ -85,7 +85,7 @@ def Compute_Parameters(seq_length, vocab_size, layer_num, hidden_size, ffn_size,
85
  kv_hidden_size = hidden_size / head_num * group_query_num
86
 
87
  # input part
88
- num_parameters_input = Compute_Parameters_input(seq_length, hidden_size, vocab_size, tp)
89
 
90
  # middle layers part
91
  num_parameters_attention = Compute_Parameters_attention(hidden_size, kv_hidden_size, is_bias, act_func, tp)
 
44
  # attention:
45
  # layernorm: h/2h
46
  if act_func == "LLaMA":
47
+ num_parameters_attention = hidden_size # RMSNorm
48
  else:
49
+ num_parameters_attention = 2 * hidden_size # LayerNorm
50
  # QKV weight: 3h*h/tp, bias: 3h/tp
51
  # output linear weight: h*h/tp, bias: h
52
  num_parameters_attention_Q_weight = hidden_size * hidden_size / tp
 
85
  kv_hidden_size = hidden_size / head_num * group_query_num
86
 
87
  # input part
88
+ num_parameters_input = Compute_Parameters_input(seq_length, hidden_size, vocab_size, act_func, tp)
89
 
90
  # middle layers part
91
  num_parameters_attention = Compute_Parameters_attention(hidden_size, kv_hidden_size, is_bias, act_func, tp)