Demo-for-Gender-Score-LLAMA-3.2

Sleeping

App Files Files Community

AhmedSSabir commited on Apr 19, 2024

Commit

9be3938

verified ·

1 Parent(s): 02397eb

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -26

app.py CHANGED Viewed

@@ -48,20 +48,22 @@ def get_sim(x):
 # Load pre-trained model
 #model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
-model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
 #model  =  gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
 #model.eval()
 #tokenizer =  gr.Interface.load('huggingface/distilgpt2')
 #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
-tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
 #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
-tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
-model = GPT2LMHeadModel.from_pretrained('distilgpt2')
 def sentence_prob_mean(text):
     # Tokenize the input text and add special tokens
@@ -85,37 +87,38 @@ def sentence_prob_mean(text):
     # Compute the mean probability across the tokens
     mean_prob = torch.mean(gathered_probs).item()
-def cloze_prob(text):
-	whole_text_encoding = tokenizer.encode(text)
-	text_list = text.split()
-	stem = ' '.join(text_list[:-1])
-	stem_encoding = tokenizer.encode(stem)
-	cw_encoding = whole_text_encoding[len(stem_encoding):]
-	tokens_tensor = torch.tensor([whole_text_encoding])
-	with torch.no_grad():
-		outputs = model(tokens_tensor)
-		predictions = outputs[0]
-	logprobs = []
-	start = -1-len(cw_encoding)
-	for j in range(start,-1,1):
-			raw_output = []
-			for i in predictions[-1][j]:
-					raw_output.append(i.item())
-			logprobs.append(np.log(softmax(raw_output)))
-	conditional_probs = []
-	for cw,prob in zip(cw_encoding,logprobs):
-			conditional_probs.append(prob[cw])
-	return np.exp(np.sum(conditional_probs))

 # Load pre-trained model
 #model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
+#model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
 #model  =  gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
 #model.eval()
 #tokenizer =  gr.Interface.load('huggingface/distilgpt2')
 #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
 #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
+#tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+model = GPT2LMHeadModel.from_pretrained('gpt2')
 def sentence_prob_mean(text):
     # Tokenize the input text and add special tokens
     # Compute the mean probability across the tokens
     mean_prob = torch.mean(gathered_probs).item()
+    return mean_prob
+# def cloze_prob(text):
+# 	whole_text_encoding = tokenizer.encode(text)
+# 	text_list = text.split()
+# 	stem = ' '.join(text_list[:-1])
+# 	stem_encoding = tokenizer.encode(stem)
+# 	cw_encoding = whole_text_encoding[len(stem_encoding):]
+# 	tokens_tensor = torch.tensor([whole_text_encoding])
+# 	with torch.no_grad():
+# 		outputs = model(tokens_tensor)
+# 		predictions = outputs[0]
+# 	logprobs = []
+# 	start = -1-len(cw_encoding)
+# 	for j in range(start,-1,1):
+# 			raw_output = []
+# 			for i in predictions[-1][j]:
+# 					raw_output.append(i.item())
+# 			logprobs.append(np.log(softmax(raw_output)))
+# 	conditional_probs = []
+# 	for cw,prob in zip(cw_encoding,logprobs):
+# 			conditional_probs.append(prob[cw])
+# 	return np.exp(np.sum(conditional_probs))