allow multiple rules
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
@@ -91,7 +91,7 @@ Args:
|
|
91 |
references (`list` of `dict`): Each reference should contain:
|
92 |
- 'validation_program' (`str`): Background knowledge in Prolog syntax
|
93 |
- 'evaluation_config' (`dict`, optional): Configuration of predicates to use for evaluation.
|
94 |
-
Define: positive_predicate, and negative_predicate, the positive one should match the head of the rule to evaluate.
|
95 |
Returns:
|
96 |
accuracy (`float`): The proportion of predictions that correctly solve all examples. Value is between 0 and 1.
|
97 |
partial_score (`float`): Average proportion of correctly classified examples across all predictions. Value is between 0 and 1.
|
@@ -130,9 +130,10 @@ def _evaluate_with_prolog(prediction, validation_program, eval_config, timeout=5
|
|
130 |
# Extract configuration
|
131 |
positive_pred = eval_config.get("positive_predicate", "eastbound")
|
132 |
negative_pred = eval_config.get("negative_predicate", "westbound")
|
|
|
133 |
|
134 |
# extract predicate from rule_to_evaluate
|
135 |
-
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred)
|
136 |
if positive_pred not in rule_to_evaluate:
|
137 |
logger.warning(f"Rule '{rule_to_evaluate}' does not contain positive predicate '{positive_pred}'")
|
138 |
return {
|
@@ -241,16 +242,16 @@ def extract_ilp_from_text(text):
|
|
241 |
return p_code
|
242 |
|
243 |
|
244 |
-
def extract_ilp_from_text_v2(text, target_predicate=None):
|
245 |
text = re.sub(r'%.*?(?=\n|$)', '', text) # remove comments
|
246 |
# Pre-process: collapse code blocks to single lines
|
247 |
text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
|
248 |
# Rule pattern, across newlines
|
249 |
rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
|
250 |
rules = list(rule_pattern.findall(text))
|
251 |
-
if len(rules) > 1:
|
252 |
-
logger.warning(f"Found multiple rules in text
|
253 |
-
rules = rules[:1]
|
254 |
# Remove rules that are also captured as facts
|
255 |
p_code = ''
|
256 |
for rule in rules:
|
|
|
91 |
references (`list` of `dict`): Each reference should contain:
|
92 |
- 'validation_program' (`str`): Background knowledge in Prolog syntax
|
93 |
- 'evaluation_config' (`dict`, optional): Configuration of predicates to use for evaluation.
|
94 |
+
Define: positive_predicate, and negative_predicate, the positive one should match the head of the rule to evaluate.
|
95 |
Returns:
|
96 |
accuracy (`float`): The proportion of predictions that correctly solve all examples. Value is between 0 and 1.
|
97 |
partial_score (`float`): Average proportion of correctly classified examples across all predictions. Value is between 0 and 1.
|
|
|
130 |
# Extract configuration
|
131 |
positive_pred = eval_config.get("positive_predicate", "eastbound")
|
132 |
negative_pred = eval_config.get("negative_predicate", "westbound")
|
133 |
+
allow_multiple_rules = eval_config.get("allow_multiple_rules", True)
|
134 |
|
135 |
# extract predicate from rule_to_evaluate
|
136 |
+
rule_to_evaluate = extract_ilp_from_text_v2(prediction, positive_pred, allow_multiple_rules)
|
137 |
if positive_pred not in rule_to_evaluate:
|
138 |
logger.warning(f"Rule '{rule_to_evaluate}' does not contain positive predicate '{positive_pred}'")
|
139 |
return {
|
|
|
242 |
return p_code
|
243 |
|
244 |
|
245 |
+
def extract_ilp_from_text_v2(text, target_predicate=None, allow_multiple_rules=False):
|
246 |
text = re.sub(r'%.*?(?=\n|$)', '', text) # remove comments
|
247 |
# Pre-process: collapse code blocks to single lines
|
248 |
text = re.sub(r'\n\s*', ' ', text) # crude: flatten all to one line
|
249 |
# Rule pattern, across newlines
|
250 |
rule_pattern = re.compile(rf'({target_predicate}\([^()]*\)\s*:-.*?\.)')
|
251 |
rules = list(rule_pattern.findall(text))
|
252 |
+
if len(rules) > 1 and not allow_multiple_rules:
|
253 |
+
logger.warning(f"Found multiple rules in text, but allow_multiple_rules is set to False. Using only the first match.")
|
254 |
+
rules = rules[:1]
|
255 |
# Remove rules that are also captured as facts
|
256 |
p_code = ''
|
257 |
for rule in rules:
|