Omartificial-Intelligence-Space commited on
Commit
e58d876
·
verified ·
1 Parent(s): 91e6eee

update submit

Browse files
Files changed (1) hide show
  1. src/submission/submit.py +43 -35
src/submission/submit.py CHANGED
@@ -22,42 +22,50 @@ REQUESTED_MODELS = None
22
  USERS_TO_SUBMISSION_DATES = None
23
 
24
  def get_top_prediction(text, tokenizer, model):
25
- inputs = tokenizer(text, return_tensors='pt')
26
- if torch.cuda.is_available():
27
- model = model.cuda()
28
- inputs = {k: v.cuda() for k, v in inputs.items()}
29
- else:
30
- model = model.cpu()
31
-
32
- with torch.no_grad():
33
- outputs = model(**inputs)
34
- # outputs.logits shape: [batch_size, seq_len, vocab_size]
35
- # We want the logits for the last token
36
- logits = outputs.logits[0, -1, :] # Shape: [vocab_size]
37
-
38
- options = ['A', 'B', 'C', 'D']
39
- option_logits = []
40
- for option in options:
41
- # Encode the option without adding special tokens
42
- option_ids = tokenizer.encode(option, add_special_tokens=False)
43
- if not option_ids:
44
- print(f"Option '{option}' could not be tokenized.")
45
- continue
46
- option_id = option_ids[0]
47
- vocab_size = logits.size(0)
48
- if option_id >= vocab_size:
49
- print(f"Option ID {option_id} is out of bounds for vocabulary size {vocab_size}")
50
- continue
51
- option_logit = logits[option_id]
52
- option_logits.append((option_logit.item(), option))
53
-
54
- if not option_logits:
55
- print("No valid options found.")
56
- return None
57
 
58
- # Get the option with the highest logit
59
- top_option = max(option_logits, key=lambda x: x[0])[1]
60
- return top_option
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  def evaluate_model_accuracy(model_name, num_examples):
 
22
  USERS_TO_SUBMISSION_DATES = None
23
 
24
  def get_top_prediction(text, tokenizer, model):
25
+ try:
26
+ inputs = tokenizer(text, return_tensors='pt')
27
+ if torch.cuda.is_available():
28
+ model = model.cuda()
29
+ inputs = {k: v.cuda() for k, v in inputs.items()}
30
+ else:
31
+ model = model.cpu()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ with torch.no_grad():
34
+ outputs = model(**inputs)
35
+ print(f"outputs.logits shape: {outputs.logits.shape}")
36
+ seq_len = outputs.logits.size(1)
37
+ if seq_len == 0:
38
+ print("No logits were produced by the model.")
39
+ return None
40
+ logits = outputs.logits[0, -1, :] # Shape: [vocab_size]
41
+
42
+ options = ['A', 'B', 'C', 'D']
43
+ option_logits = []
44
+ for option in options:
45
+ # Encode the option without adding special tokens
46
+ option_ids = tokenizer.encode(option, add_special_tokens=False)
47
+ if not option_ids:
48
+ print(f"Option '{option}' could not be tokenized.")
49
+ continue
50
+ option_id = option_ids[0]
51
+ vocab_size = logits.size(0)
52
+ if option_id >= vocab_size:
53
+ print(f"Option ID {option_id} is out of bounds for vocabulary size {vocab_size}")
54
+ continue
55
+ option_logit = logits[option_id]
56
+ option_logits.append((option_logit.item(), option))
57
+
58
+ if not option_logits:
59
+ print("No valid options found.")
60
+ return None
61
+
62
+ # Get the option with the highest logit
63
+ top_option = max(option_logits, key=lambda x: x[0])[1]
64
+ return top_option
65
+ except Exception as e:
66
+ tb = traceback.format_exc()
67
+ print(f"Error in get_top_prediction: {e}\n{tb}")
68
+ return None
69
 
70
 
71
  def evaluate_model_accuracy(model_name, num_examples):