chivier commited on
Commit
1934588
·
1 Parent(s): f71ffb9

sync from github

Browse files
Files changed (1) hide show
  1. src/backend/hflm_with_measurement.py +27 -15
src/backend/hflm_with_measurement.py CHANGED
@@ -24,7 +24,7 @@ from transformers.models.auto.modeling_auto import (
24
  MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
25
  )
26
  from transformers import TextStreamer
27
-
28
  from lm_eval import utils
29
  from lm_eval.api.instance import Instance
30
  from lm_eval.api.model import TemplateLM
@@ -333,21 +333,26 @@ class HFLMWithMeasurement(HFLM):
333
  linear_count = 0
334
  element_wise_mul = 0
335
  for name, module in self.model.named_modules():
336
- if ('layers.0.' in name or 'decoder.0.' in name) and ('attn' not in name):
337
- if 'experts.0.' in name:
 
 
338
  if isinstance(module, torch.nn.Linear):
339
  # print(name, module)
340
  linear_count += 1
341
- elif 'experts' not in name:
342
- if "gate" not in name or "gate_proj" in name:
343
- if "gate_proj" in name:
344
- element_wise_mul = 1
345
- if isinstance(module, torch.nn.Linear):
346
- # print(name, module)
347
- linear_count += 1
 
 
348
  else:
349
  continue
350
  print(f"linear_count: {linear_count}")
 
351
 
352
  stopping_criteria = stop_sequences_criteria(
353
  self.tokenizer, stop, context.shape[1], context.shape[0]
@@ -373,13 +378,17 @@ class HFLMWithMeasurement(HFLM):
373
  model_info = API.model_info(repo_id=self.pretrained, revision=self.revision)
374
  model_size_param = get_model_size(model_info=model_info, precision=self.precision)
375
 
376
- n_layers = model_config.num_hidden_layers if hasattr(model_config, "num_hidden_layers") else model_config.num_layers
 
 
377
  d_model = model_config.hidden_size if hasattr(model_config, "hidden_size") else model_config.d_model
378
 
379
  if hasattr(model_config, "num_experts_per_tok"):
380
  n_experts_per_tok = model_config.num_experts_per_tok
381
  elif hasattr(model_config, "num_selected_experts"):
382
  n_experts_per_tok = model_config.num_selected_experts
 
 
383
  else:
384
  n_experts_per_tok = 1
385
 
@@ -389,16 +398,19 @@ class HFLMWithMeasurement(HFLM):
389
  d_ff = model_config.intermediate_size
390
  elif hasattr(model_config, "d_ff"):
391
  d_ff = model_config.d_ff
 
 
 
 
392
  else:
393
- if hasattr(model_config, "ff_ratio"):
394
- d_ff = d_model * model_config.ff_ratio
395
- else:
396
- raise ValueError("Unknown FFN dimension")
397
 
398
  if hasattr(model_config, "num_local_experts"):
399
  num_experts = model_config.num_local_experts
400
  elif hasattr(model_config, "num_experts"):
401
  num_experts = model_config.num_experts
 
 
402
  else:
403
  num_experts = 1
404
 
 
24
  MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
25
  )
26
  from transformers import TextStreamer
27
+ from transformers.models.dbrx.modeling_dbrx import DbrxExpertGLU
28
  from lm_eval import utils
29
  from lm_eval.api.instance import Instance
30
  from lm_eval.api.model import TemplateLM
 
333
  linear_count = 0
334
  element_wise_mul = 0
335
  for name, module in self.model.named_modules():
336
+ if ('layers.0.' in name or "transformer.blocks.0" in name) and ('attn' not in name):
337
+ if 'experts.0.' in name or "ffn.experts" in name:
338
+ if "linear_v" in name:
339
+ element_wise_mul = 1
340
  if isinstance(module, torch.nn.Linear):
341
  # print(name, module)
342
  linear_count += 1
343
+ elif isinstance(module, DbrxExpertGLU):
344
+ linear_count = 3
345
+ # elif 'experts' not in name:
346
+ # if ("gate" not in name and "router" not in name) or "gate_proj" in name:
347
+ # if "gate_proj" in name:
348
+ # element_wise_mul = 1
349
+ # if isinstance(module, torch.nn.Linear):
350
+ # # print(name, module)
351
+ # linear_count += 1
352
  else:
353
  continue
354
  print(f"linear_count: {linear_count}")
355
+ print(f"element_wise_mul: {element_wise_mul}")
356
 
357
  stopping_criteria = stop_sequences_criteria(
358
  self.tokenizer, stop, context.shape[1], context.shape[0]
 
378
  model_info = API.model_info(repo_id=self.pretrained, revision=self.revision)
379
  model_size_param = get_model_size(model_info=model_info, precision=self.precision)
380
 
381
+ n_layers = model_config.num_hidden_layers if hasattr(model_config, "num_hidden_layers") else \
382
+ (model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layers)
383
+
384
  d_model = model_config.hidden_size if hasattr(model_config, "hidden_size") else model_config.d_model
385
 
386
  if hasattr(model_config, "num_experts_per_tok"):
387
  n_experts_per_tok = model_config.num_experts_per_tok
388
  elif hasattr(model_config, "num_selected_experts"):
389
  n_experts_per_tok = model_config.num_selected_experts
390
+ elif hasattr(model_config, "ffn_config"):
391
+ n_experts_per_tok = model_config.ffn_config.moe_top_k
392
  else:
393
  n_experts_per_tok = 1
394
 
 
398
  d_ff = model_config.intermediate_size
399
  elif hasattr(model_config, "d_ff"):
400
  d_ff = model_config.d_ff
401
+ elif hasattr(model_config, "ff_ratio"):
402
+ d_ff = d_model * model_config.ff_ratio
403
+ elif hasattr(model_config, "ffn_config"):
404
+ d_ff = model_config.ffn_config.ffn_hidden_size
405
  else:
406
+ raise ValueError("Unknown FFN dimension")
 
 
 
407
 
408
  if hasattr(model_config, "num_local_experts"):
409
  num_experts = model_config.num_local_experts
410
  elif hasattr(model_config, "num_experts"):
411
  num_experts = model_config.num_experts
412
+ elif hasattr(model_config, "ffn_config"):
413
+ num_experts = model_config.ffn_config.moe_num_experts
414
  else:
415
  num_experts = 1
416