Rafal commited on
Commit
f250d4a
·
1 Parent(s): a6d6e4e

Added testing FALCON

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. mgr_bias_scoring.py +19 -5
app.py CHANGED
@@ -872,7 +872,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
872
  gen_title = gr.Markdown("### Select Tested Model", visible=True)
873
 
874
  # Tested Model Selection - "openlm-research/open_llama_7b"
875
- tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b"], value="bert-base-uncased",
876
  multiselect=None,
877
  interactive=True,
878
  label="Tested Language Model",
 
872
  gen_title = gr.Markdown("### Select Tested Model", visible=True)
873
 
874
  # Tested Model Selection - "openlm-research/open_llama_7b"
875
+ tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b", "tiiuae/falcon-7b"], value="bert-base-uncased",
876
  multiselect=None,
877
  interactive=True,
878
  label="Tested Language Model",
mgr_bias_scoring.py CHANGED
@@ -20,6 +20,8 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
20
  from transformers import BioGptForCausalLM, BioGptTokenizer
21
  # LLAMA
22
  from transformers import LlamaTokenizer, LlamaForCausalLM
 
 
23
 
24
  import mgr_sentences as smgr
25
  import mgr_biases as bmgr
@@ -71,7 +73,18 @@ def _getModel(model_name, device):
71
  offload_folder="offload",
72
  offload_state_dict = True,
73
  device_map='auto')
74
- #model.tie_weights()
 
 
 
 
 
 
 
 
 
 
 
75
  if model == None:
76
  print("Model is empty!!!")
77
  else:
@@ -634,7 +647,7 @@ def testModelProbability(model_name, model, tokenizer, device):
634
  print(f"Testing on GPT-2 family model: {model_name}")
635
  #print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
636
  elif 'llama' in model_name:
637
- print(f"Testing on LLAMA family model: {model_name}")
638
  #print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
639
 
640
  # bias test on one row of a dataframe -> row is one sentence template with target terms
@@ -645,7 +658,8 @@ def checkBias(row, biasProbFunc, model, tokenizer, device, progress, df_len):
645
  if progress != None:
646
  progress(row.name/df_len, desc=f"{row['template']}")
647
 
648
- test_res = [1,0] # fail-safe
 
649
  try:
650
  test_res, sentences = biasProbFunc(model, tokenizer, row['template'].replace("[T]","[MASK]"), grp_terms, device)
651
  except ValueError as err:
@@ -740,8 +754,8 @@ def testBiasOnPairs(gen_pairs_df, bias_spec, model_name, model, tokenizer, devic
740
  gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
741
  biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
742
 
743
- elif 'llama' in model_name.lower():
744
- print(f"Testing on LLAMA family model: {model_name}")
745
  gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
746
  biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
747
 
 
20
  from transformers import BioGptForCausalLM, BioGptTokenizer
21
  # LLAMA
22
  from transformers import LlamaTokenizer, LlamaForCausalLM
23
+ # FALCON
24
+ from transformers import AutoTokenizer, AutoModelForCausalLM
25
 
26
  import mgr_sentences as smgr
27
  import mgr_biases as bmgr
 
73
  offload_folder="offload",
74
  offload_state_dict = True,
75
  device_map='auto')
76
+ elif "falcon" in model_name.lower():
77
+ print(f"Getting FALCON model: {model_name}")
78
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
79
+ model = AutoModelForCausalLM.from_pretrained(model_name,
80
+ torch_dtype=torch.bfloat16,
81
+ trust_remote_code=True,
82
+ low_cpu_mem_usage=True, ##
83
+ #use_safetensors=True, ##
84
+ offload_folder="offload",
85
+ offload_state_dict = True,
86
+ device_map='auto')
87
+ #model.tie_weights()
88
  if model == None:
89
  print("Model is empty!!!")
90
  else:
 
647
  print(f"Testing on GPT-2 family model: {model_name}")
648
  #print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
649
  elif 'llama' in model_name:
650
+ print(f"Testing on LLAMA or FALCON family model: {model_name}")
651
  #print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
652
 
653
  # bias test on one row of a dataframe -> row is one sentence template with target terms
 
658
  if progress != None:
659
  progress(row.name/df_len, desc=f"{row['template']}")
660
 
661
+ test_res = [0,1]
662
+ random.shuffle(test_res) # fail-safe
663
  try:
664
  test_res, sentences = biasProbFunc(model, tokenizer, row['template'].replace("[T]","[MASK]"), grp_terms, device)
665
  except ValueError as err:
 
754
  gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
755
  biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
756
 
757
+ elif 'llama' in model_name.lower() or 'falcon' in model_name.lower():
758
+ print(f"Testing on LLAMA or FALCON family model: {model_name}")
759
  gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
760
  biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
761