use multi processing only for 500 or more samples
Browse files
VerifiableRewardsForScalableLogicalReasoning.py
CHANGED
@@ -314,18 +314,21 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
|
|
314 |
|
315 |
eval_inputs.append((prediction, validation_program, eval_config))
|
316 |
|
317 |
-
#
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
329 |
|
330 |
# Calculate metrics
|
331 |
partial_scores = [result["partial_score"] for result in results]
|
|
|
314 |
|
315 |
eval_inputs.append((prediction, validation_program, eval_config))
|
316 |
|
317 |
+
# if more than 1k predictions, we use multiprocessing to speed up the evaluation
|
318 |
+
if len(eval_inputs) > 1000:
|
319 |
+
# Process evaluations in parallel
|
320 |
+
num_cpus = max(1, mp.cpu_count() - 1) # Leave one CPU free
|
321 |
+
with mp.Pool(processes=num_cpus) as pool:
|
322 |
+
results = list(tqdm(
|
323 |
+
pool.starmap(_evaluate_with_prolog, eval_inputs),
|
324 |
+
total=len(eval_inputs),
|
325 |
+
desc="Evaluating rules (parallel)"
|
326 |
+
))
|
327 |
+
else:
|
328 |
+
# Evaluate in the main thread (no multiprocessing)
|
329 |
+
results = []
|
330 |
+
for prediction, validation_program, eval_config in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
|
331 |
+
results.append(_evaluate_with_prolog(prediction, validation_program, eval_config))
|
332 |
|
333 |
# Calculate metrics
|
334 |
partial_scores = [result["partial_score"] for result in results]
|