AhmedSSabir commited on
Commit
b0fa336
·
verified ·
1 Parent(s): d3000a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -39
app.py CHANGED
@@ -53,45 +53,6 @@ import re
53
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
54
  model = GPT2LMHeadModel.from_pretrained('gpt2')
55
 
56
- # def cloze_prob(text):
57
-
58
- # whole_text_encoding = tokenizer.encode(text)
59
- # # Parse out the stem of the whole sentence (i.e., the part leading up to but not including the critical word)
60
- # text_list = text.split()
61
- # stem = ' '.join(text_list[:-1])
62
- # stem_encoding = tokenizer.encode(stem)
63
- # # cw_encoding is just the difference between whole_text_encoding and stem_encoding
64
- # # note: this might not correspond exactly to the word itself
65
- # cw_encoding = whole_text_encoding[len(stem_encoding):]
66
- # # Run the entire sentence through the model. Then go "back in time" to look at what the model predicted for each token, starting at the stem.
67
- # # Put the whole text encoding into a tensor, and get the model's comprehensive output
68
- # tokens_tensor = torch.tensor([whole_text_encoding])
69
-
70
- # with torch.no_grad():
71
- # outputs = model(tokens_tensor)
72
- # predictions = outputs[0]
73
-
74
- # logprobs = []
75
- # # start at the stem and get downstream probabilities incrementally from the model(see above)
76
- # start = -1-len(cw_encoding)
77
- # for j in range(start,-1,1):
78
- # raw_output = []
79
- # for i in predictions[-1][j]:
80
- # raw_output.append(i.item())
81
-
82
- # logprobs.append(np.log(softmax(raw_output)))
83
-
84
- # # if the critical word is three tokens long, the raw_probabilities should look something like this:
85
- # # [ [0.412, 0.001, ... ] ,[0.213, 0.004, ...], [0.002,0.001, 0.93 ...]]
86
- # # Then for the i'th token we want to find its associated probability
87
- # # this is just: raw_probabilities[i][token_index]
88
- # conditional_probs = []
89
- # for cw,prob in zip(cw_encoding,logprobs):
90
- # conditional_probs.append(prob[cw])
91
- # # now that you have all the relevant probabilities, return their product.
92
- # # This is the probability of the critical word given the context before it.
93
-
94
- # return np.exp(np.sum(conditional_probs))
95
 
96
  def sentence_prob_mean(text):
97
  # Tokenize the input text and add special tokens
 
53
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
54
  model = GPT2LMHeadModel.from_pretrained('gpt2')
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def sentence_prob_mean(text):
58
  # Tokenize the input text and add special tokens