Spaces:
Sleeping
Sleeping
Rafal
commited on
Commit
·
f250d4a
1
Parent(s):
a6d6e4e
Added testing FALCON
Browse files- app.py +1 -1
- mgr_bias_scoring.py +19 -5
app.py
CHANGED
@@ -872,7 +872,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
872 |
gen_title = gr.Markdown("### Select Tested Model", visible=True)
|
873 |
|
874 |
# Tested Model Selection - "openlm-research/open_llama_7b"
|
875 |
-
tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b"], value="bert-base-uncased",
|
876 |
multiselect=None,
|
877 |
interactive=True,
|
878 |
label="Tested Language Model",
|
|
|
872 |
gen_title = gr.Markdown("### Select Tested Model", visible=True)
|
873 |
|
874 |
# Tested Model Selection - "openlm-research/open_llama_7b"
|
875 |
+
tested_model_name = gr.Dropdown( ["bert-base-uncased","bert-large-uncased","gpt2","gpt2-medium","gpt2-large","emilyalsentzer/Bio_ClinicalBERT","microsoft/biogpt","openlm-research/open_llama_3b", "openlm-research/open_llama_7b", "tiiuae/falcon-7b"], value="bert-base-uncased",
|
876 |
multiselect=None,
|
877 |
interactive=True,
|
878 |
label="Tested Language Model",
|
mgr_bias_scoring.py
CHANGED
@@ -20,6 +20,8 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
20 |
from transformers import BioGptForCausalLM, BioGptTokenizer
|
21 |
# LLAMA
|
22 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
|
|
|
|
23 |
|
24 |
import mgr_sentences as smgr
|
25 |
import mgr_biases as bmgr
|
@@ -71,7 +73,18 @@ def _getModel(model_name, device):
|
|
71 |
offload_folder="offload",
|
72 |
offload_state_dict = True,
|
73 |
device_map='auto')
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
if model == None:
|
76 |
print("Model is empty!!!")
|
77 |
else:
|
@@ -634,7 +647,7 @@ def testModelProbability(model_name, model, tokenizer, device):
|
|
634 |
print(f"Testing on GPT-2 family model: {model_name}")
|
635 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
636 |
elif 'llama' in model_name:
|
637 |
-
print(f"Testing on LLAMA family model: {model_name}")
|
638 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
639 |
|
640 |
# bias test on one row of a dataframe -> row is one sentence template with target terms
|
@@ -645,7 +658,8 @@ def checkBias(row, biasProbFunc, model, tokenizer, device, progress, df_len):
|
|
645 |
if progress != None:
|
646 |
progress(row.name/df_len, desc=f"{row['template']}")
|
647 |
|
648 |
-
test_res = [1
|
|
|
649 |
try:
|
650 |
test_res, sentences = biasProbFunc(model, tokenizer, row['template'].replace("[T]","[MASK]"), grp_terms, device)
|
651 |
except ValueError as err:
|
@@ -740,8 +754,8 @@ def testBiasOnPairs(gen_pairs_df, bias_spec, model_name, model, tokenizer, devic
|
|
740 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
741 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
742 |
|
743 |
-
elif 'llama' in model_name.lower():
|
744 |
-
print(f"Testing on LLAMA family model: {model_name}")
|
745 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
746 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
747 |
|
|
|
20 |
from transformers import BioGptForCausalLM, BioGptTokenizer
|
21 |
# LLAMA
|
22 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
23 |
+
# FALCON
|
24 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
25 |
|
26 |
import mgr_sentences as smgr
|
27 |
import mgr_biases as bmgr
|
|
|
73 |
offload_folder="offload",
|
74 |
offload_state_dict = True,
|
75 |
device_map='auto')
|
76 |
+
elif "falcon" in model_name.lower():
|
77 |
+
print(f"Getting FALCON model: {model_name}")
|
78 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
79 |
+
model = AutoModelForCausalLM.from_pretrained(model_name,
|
80 |
+
torch_dtype=torch.bfloat16,
|
81 |
+
trust_remote_code=True,
|
82 |
+
low_cpu_mem_usage=True, ##
|
83 |
+
#use_safetensors=True, ##
|
84 |
+
offload_folder="offload",
|
85 |
+
offload_state_dict = True,
|
86 |
+
device_map='auto')
|
87 |
+
#model.tie_weights()
|
88 |
if model == None:
|
89 |
print("Model is empty!!!")
|
90 |
else:
|
|
|
647 |
print(f"Testing on GPT-2 family model: {model_name}")
|
648 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
649 |
elif 'llama' in model_name:
|
650 |
+
print(f"Testing on LLAMA or FALCON family model: {model_name}")
|
651 |
#print(getGPT2Prob(model, tokenizer, "African American man was [MASK]", ["poor","rich"], device, verbose=True))
|
652 |
|
653 |
# bias test on one row of a dataframe -> row is one sentence template with target terms
|
|
|
658 |
if progress != None:
|
659 |
progress(row.name/df_len, desc=f"{row['template']}")
|
660 |
|
661 |
+
test_res = [0,1]
|
662 |
+
random.shuffle(test_res) # fail-safe
|
663 |
try:
|
664 |
test_res, sentences = biasProbFunc(model, tokenizer, row['template'].replace("[T]","[MASK]"), grp_terms, device)
|
665 |
except ValueError as err:
|
|
|
754 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
755 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
756 |
|
757 |
+
elif 'llama' in model_name.lower() or 'falcon' in model_name.lower():
|
758 |
+
print(f"Testing on LLAMA or FALCON family model: {model_name}")
|
759 |
gen_pairs_df[['stereotyped','top_term','bottom_term','top_logit','bottom_logit']] = gen_pairs_df.progress_apply(
|
760 |
biasTestFunc, biasProbFunc=modelGPT2TestFunc, model=model, tokenizer=tokenizer, device=device, progress=progress, df_len=gen_pairs_df.shape[0], axis=1)
|
761 |
|