LukasHug commited on
Commit
c8de9ce
·
1 Parent(s): 0f1c352

use multi processing only for 500 or more samples

Browse files
VerifiableRewardsForScalableLogicalReasoning.py CHANGED
@@ -314,18 +314,21 @@ class VerifiableRewardsForScalableLogicalReasoning(evaluate.Metric):
314
 
315
  eval_inputs.append((prediction, validation_program, eval_config))
316
 
317
- # Process evaluations in parallel
318
- num_cpus = max(1, mp.cpu_count() - 1) # Leave one CPU free
319
- with mp.Pool(processes=num_cpus) as pool:
320
- results = list(tqdm(
321
- pool.starmap(_evaluate_with_prolog, eval_inputs),
322
- total=len(eval_inputs),
323
- desc="Evaluating rules (parallel)"
324
- ))
325
- # no multiprocessing in the main thread, so we can use tqdm directly
326
- # results = []
327
- # for prediction, validation_program, eval_config in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
328
- # results.append(_evaluate_with_prolog(prediction, validation_program, eval_config))
 
 
 
329
 
330
  # Calculate metrics
331
  partial_scores = [result["partial_score"] for result in results]
 
314
 
315
  eval_inputs.append((prediction, validation_program, eval_config))
316
 
317
+ # if more than 1k predictions, we use multiprocessing to speed up the evaluation
318
+ if len(eval_inputs) > 1000:
319
+ # Process evaluations in parallel
320
+ num_cpus = max(1, mp.cpu_count() - 1) # Leave one CPU free
321
+ with mp.Pool(processes=num_cpus) as pool:
322
+ results = list(tqdm(
323
+ pool.starmap(_evaluate_with_prolog, eval_inputs),
324
+ total=len(eval_inputs),
325
+ desc="Evaluating rules (parallel)"
326
+ ))
327
+ else:
328
+ # Evaluate in the main thread (no multiprocessing)
329
+ results = []
330
+ for prediction, validation_program, eval_config in tqdm(eval_inputs, total=len(predictions), desc="Evaluating rules"):
331
+ results.append(_evaluate_with_prolog(prediction, validation_program, eval_config))
332
 
333
  # Calculate metrics
334
  partial_scores = [result["partial_score"] for result in results]