LukasHug commited on
Commit
499cdbd
·
1 Parent(s): a0f954f

timer back to 10 sec, remove logging messages

Browse files
VerifiableRewardsForScalableLogicalReasoning.py CHANGED
@@ -136,12 +136,11 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
136
  rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
137
  if positive_pred not in rule_to_evaluate:
138
  p = prediction.replace('\n', ' ')
139
- logger.warning(f"Rule does not contain predicate '{positive_pred}': {p}")
140
  return {
141
  "is_correct": False,
142
  "partial_score": 0.0,
143
  "syntax_valid": False,
144
- "error": f"Invalid Syntax: Logic Rule not found for symbol '{positive_pred}'"
145
  }
146
 
147
  pos_examples = re.findall(rf'{positive_pred}\(([^)]+)\)', validation_program)
@@ -214,13 +213,11 @@ check_all :- forall((pos({vars});neg({vars})), check({vars})).
214
 
215
  except subprocess.TimeoutExpired:
216
  r = rule_to_evaluate.replace('\n', ' ')
217
- logger.warning(f"Evaluation timed out after {timeout} seconds for rule: '{r}'")
218
  return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
219
- "error": f"Evaluation timed out after {timeout} seconds"}
220
  except Exception as e:
221
- logger.warning(f"Error evaluating rule '{rule_to_evaluate}' returns: '{result.stdout.strip() if result else 'No error message'}' with error: {e}")
222
  return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
223
- "error": f"Syntactically invalid rule '{rule_to_evaluate}'"}
224
  finally:
225
  if os.path.exists(temp_file):
226
  os.remove(temp_file)
@@ -325,7 +322,7 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
325
  raise ValueError(
326
  f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
327
 
328
- TIMEOUT = 5 if len(predictions) > 500 else 5
329
  # Prepare evaluation inputs
330
  eval_inputs = []
331
  for i, (prediction, reference) in enumerate(zip(predictions, references)):
 
136
  rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
137
  if positive_pred not in rule_to_evaluate:
138
  p = prediction.replace('\n', ' ')
 
139
  return {
140
  "is_correct": False,
141
  "partial_score": 0.0,
142
  "syntax_valid": False,
143
+ "error": f"Invalid Syntax: Logic Rule not found for symbol '{positive_pred}': {p}"
144
  }
145
 
146
  pos_examples = re.findall(rf'{positive_pred}\(([^)]+)\)', validation_program)
 
213
 
214
  except subprocess.TimeoutExpired:
215
  r = rule_to_evaluate.replace('\n', ' ')
 
216
  return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
217
+ "error": "Evaluation timed out after {timeout} seconds for rule: '{r}'"}
218
  except Exception as e:
 
219
  return {"is_correct": False, "partial_score": 0.0, "syntax_valid": False,
220
+ "error": f"Error evaluating rule '{rule_to_evaluate}' returns: '{result.stdout.strip() if result else 'No error message'}' with error: {e}"}
221
  finally:
222
  if os.path.exists(temp_file):
223
  os.remove(temp_file)
 
322
  raise ValueError(
323
  f"Number of predictions ({len(predictions)}) and references {len(references)}) don't match")
324
 
325
+ TIMEOUT = 10 if len(predictions) > 500 else 5
326
  # Prepare evaluation inputs
327
  eval_inputs = []
328
  for i, (prediction, reference) in enumerate(zip(predictions, references)):