LukasHug commited on
Commit
a0f954f
·
1 Parent(s): 7fd2051

reduce timer back to 5 seconds

Browse files
VerifiableRewardsForScalableLogicalReasoning.py CHANGED
@@ -135,7 +135,8 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
135
  # extract predicate from rule_to_evaluate
136
  rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
137
  if positive_pred not in rule_to_evaluate:
138
- logger.warning(f"Rule does not contain positive predicate '{positive_pred}'")
 
139
  return {
140
  "is_correct": False,
141
  "partial_score": 0.0,
@@ -212,7 +213,8 @@ check_all :- forall((pos({vars});neg({vars})), check({vars})).
212
  }
213
 
214
  except subprocess.TimeoutExpired:
215
- logger.warning(f"Evaluation timed out after {timeout} seconds for rule: '{rule_to_evaluate.replace('\n', ' ')}'")
 
216
  return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
217
  "error": f"Evaluation timed out after {timeout} seconds"}
218
  except Exception as e:
@@ -323,7 +325,7 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
323
  raise ValueError(
324
  f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
325
 
326
- TIMEOUT = 15 if len(predictions) > 500 else 5
327
  # Prepare evaluation inputs
328
  eval_inputs = []
329
  for i, (prediction, reference) in enumerate(zip(predictions, references)):
 
135
  # extract predicate from rule_to_evaluate
136
  rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
137
  if positive_pred not in rule_to_evaluate:
138
+ p = prediction.replace('\n', ' ')
139
+ logger.warning(f"Rule does not contain predicate '{positive_pred}': {p}")
140
  return {
141
  "is_correct": False,
142
  "partial_score": 0.0,
 
213
  }
214
 
215
  except subprocess.TimeoutExpired:
216
+ r = rule_to_evaluate.replace('\n', ' ')
217
+ logger.warning(f"Evaluation timed out after {timeout} seconds for rule: '{r}'")
218
  return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
219
  "error": f"Evaluation timed out after {timeout} seconds"}
220
  except Exception as e:
 
325
  raise ValueError(
326
  f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
327
 
328
+ TIMEOUT = 5 if len(predictions) > 500 else 5
329
  # Prepare evaluation inputs
330
  eval_inputs = []
331
  for i, (prediction, reference) in enumerate(zip(predictions, references)):