LukasHug commited on
Commit
78670b1
·
1 Parent(s): 499cdbd

fix single eval

Browse files
VerifiableRewardsForScalableLogicalReasoning.py CHANGED
@@ -246,6 +246,7 @@ def extract_ilp_from_text_v2(text, target_predicate=None, allow_multiple_rules=F
246
  # Pre-process: collapse code blocks to single lines
247
  text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
248
  # Rule pattern, across newlines
 
249
  rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
250
  rules = list(rule_pattern.findall(text))
251
  if len(rules) > 1 and not allow_multiple_rules:
@@ -353,8 +354,8 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
353
  else:
354
  # Evaluate in the main thread (no multiprocessing)
355
  results = []
356
- for prediction, validation_program, eval_config in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
357
- results.append(_evaluate_with_prolog(prediction, validation_program, eval_config))
358
 
359
  # Calculate metrics
360
  partial_scores = [result["partial_score"] for result in results]
 
246
  # Pre-process: collapse code blocks to single lines
247
  text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
248
  # Rule pattern, across newlines
249
+ # rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
250
  rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
251
  rules = list(rule_pattern.findall(text))
252
  if len(rules) > 1 and not allow_multiple_rules:
 
354
  else:
355
  # Evaluate in the main thread (no multiprocessing)
356
  results = []
357
+ for prediction, validation_program, eval_config, t in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
358
+ results.append(_evaluate_with_prolog(prediction, validation_program, eval_config, timeout=t))
359
 
360
  # Calculate metrics
361
  partial_scores = [result["partial_score"] for result in results]