Spaces:

AhmedSSabir
/

Demo-for-Gender-Score

Sleeping

App Files Files Community

AhmedSSabir commited on Apr 19, 2024

Commit

78464e7

verified ·

1 Parent(s): c505acd

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -25

app.py CHANGED Viewed

@@ -59,35 +59,63 @@ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
-def cloze_prob(text):
-	whole_text_encoding = tokenizer.encode(text)
-	text_list = text.split()
-	stem = ' '.join(text_list[:-1])
-	stem_encoding = tokenizer.encode(stem)
-	cw_encoding = whole_text_encoding[len(stem_encoding):]
-	tokens_tensor = torch.tensor([whole_text_encoding])
-	with torch.no_grad():
-		outputs = model(tokens_tensor)
-		predictions = outputs[0]
-	logprobs = []
-	start = -1-len(cw_encoding)
-	for j in range(start,-1,1):
-			raw_output = []
-			for i in predictions[-1][j]:
-					raw_output.append(i.item())
-			logprobs.append(np.log(softmax(raw_output)))
-	conditional_probs = []
-	for cw,prob in zip(cw_encoding,logprobs):
-			conditional_probs.append(prob[cw])
-	return np.exp(np.sum(conditional_probs))
@@ -117,8 +145,14 @@ def Visual_re_ranker(caption_man, caption_woman, context_label, context_prob):
     sim_w = get_sim(sim_w)
-    LM_man = cloze_prob(caption_man)
-    LM_woman = cloze_prob(caption_woman)
     #LM  = scorer.sentence_score(caption, reduce="mean")
     score_man     = pow(float(LM_man),pow((1-float(sim_m))/(1+ float(sim_m)),1-float(context_prob)))
     score_woman   = pow(float(LM_woman),pow((1-float(sim_w))/(1+ float(sim_w)),1-float(context_prob)))

 #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+model = GPT2LMHeadModel.from_pretrained('gpt2')
+def sentence_prob_mean(text):
+    # Tokenize the input text and add special tokens
+    input_ids = tokenizer.encode(text, return_tensors='pt')
+    # Obtain model outputs
+    with torch.no_grad():
+        outputs = model(input_ids, labels=input_ids)
+        logits = outputs.logits  # logits are the model outputs before applying softmax
+    # Shift logits and labels so that tokens are aligned:
+    shift_logits = logits[..., :-1, :].contiguous()
+    shift_labels = input_ids[..., 1:].contiguous()
+    # Calculate the softmax probabilities
+    probs = softmax(shift_logits, dim=-1)
+    # Gather the probabilities of the actual token IDs
+    gathered_probs = torch.gather(probs, 2, shift_labels.unsqueeze(-1)).squeeze(-1)
+    # Compute the mean probability across the tokens
+    mean_prob = torch.mean(gathered_probs).item()
+# def cloze_prob(text):
+# 	whole_text_encoding = tokenizer.encode(text)
+# 	text_list = text.split()
+# 	stem = ' '.join(text_list[:-1])
+# 	stem_encoding = tokenizer.encode(stem)
+# 	cw_encoding = whole_text_encoding[len(stem_encoding):]
+# 	tokens_tensor = torch.tensor([whole_text_encoding])
+# 	with torch.no_grad():
+# 		outputs = model(tokens_tensor)
+# 		predictions = outputs[0]
+# 	logprobs = []
+# 	start = -1-len(cw_encoding)
+# 	for j in range(start,-1,1):
+# 			raw_output = []
+# 			for i in predictions[-1][j]:
+# 					raw_output.append(i.item())
+# 			logprobs.append(np.log(softmax(raw_output)))
+# 	conditional_probs = []
+# 	for cw,prob in zip(cw_encoding,logprobs):
+# 			conditional_probs.append(prob[cw])
+# 	return np.exp(np.sum(conditional_probs))
     sim_w = get_sim(sim_w)
+    LM_man =  sentence_prob_mean(caption_man)
+    LM_woman = sentence_prob_mean(caption_woman)
+    # LM_man = cloze_prob(caption_man)
+    # LM_woman = cloze_prob(caption_woman)
+   )
     #LM  = scorer.sentence_score(caption, reduce="mean")
     score_man     = pow(float(LM_man),pow((1-float(sim_m))/(1+ float(sim_m)),1-float(context_prob)))
     score_woman   = pow(float(LM_woman),pow((1-float(sim_w))/(1+ float(sim_w)),1-float(context_prob)))