fix single eval
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
@@ -246,6 +246,7 @@ def extract_ilp_from_text_v2(text, target_predicate=None, allow_multiple_rules=F
|
|
246 |
# Pre-process: collapse code blocks to single lines
|
247 |
text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
|
248 |
# Rule pattern, across newlines
|
|
|
249 |
rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
|
250 |
rules = list(rule_pattern.findall(text))
|
251 |
if len(rules) > 1 and not allow_multiple_rules:
|
@@ -353,8 +354,8 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
|
|
353 |
else:
|
354 |
# Evaluate in the main thread (no multiprocessing)
|
355 |
results = []
|
356 |
-
for prediction, validation_program, eval_config in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
|
357 |
-
results.append(_evaluate_with_prolog(prediction, validation_program, eval_config))
|
358 |
|
359 |
# Calculate metrics
|
360 |
partial_scores = [result["partial_score"] for result in results]
|
|
|
246 |
# Pre-process: collapse code blocks to single lines
|
247 |
text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
|
248 |
# Rule pattern, across newlines
|
249 |
+
# rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
|
250 |
rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
|
251 |
rules = list(rule_pattern.findall(text))
|
252 |
if len(rules) > 1 and not allow_multiple_rules:
|
|
|
354 |
else:
|
355 |
# Evaluate in the main thread (no multiprocessing)
|
356 |
results = []
|
357 |
+
for prediction, validation_program, eval_config, t in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
|
358 |
+
results.append(_evaluate_with_prolog(prediction, validation_program, eval_config, timeout=t))
|
359 |
|
360 |
# Calculate metrics
|
361 |
partial_scores = [result["partial_score"] for result in results]
|