Spaces:

kpfadnis
/

InspectorRAGet

Running

App Files Files

InspectorRAGet / src /utilities /significance.ts

kpfadnis

chore: Initial commit.

599f646 about 1 year ago

raw

history blame

2.87 kB

	/**
	*
	* Copyright 2023-2024 InspectorRAGet Team
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	**/

	/**
	* Fisher's Randomization Test as described in "A Comparison of Statistical Significance Tests for Information Retrieval Evaluation" by Mark D. Smucker, James Allan, and Ben Carterette. CIKM 2007.
	*
	* The same method is described in W. Morgan’s slides ( https://cs.stanford.edu/people/wmorgan/sigtest.pdf ) under the name "randomization test".
	* W. Morgan adds “+1” that Morgan adds to the numerator and denominator (the bullet starting with “Actually” on slide 10 and ending with “not that it matters for, say, R ≥ 19”).
	* We do the same here because Morgan’s case for including it (to be "statistically valid") seems solid -- but as noted, it doesn't matter much.
	*
	* Philipp Koehn ( https://aclanthology.org/W04-3250/ ) calls the method "paired bootstrap resampling".
	*
	* NOTE: This implementation assumes the metric is computed as the mean of a list of scores, one per sample.
	*
	* EXTRA: Fisher's Randomization Test can also be run on other metrics, but then you need to apply the full metric computation to the x and y vectors below.
	* @param distributionA
	* @param distributionB
	* @param FISHER_RANDOMIZATION_TRIALS
	* @returns
	*/
	export function calculateFisherRandomization(
	distributionA: number[],
	distributionB: number[],
	FISHER_RANDOMIZATION_TRIALS: number = 100000,
	) {
	const meanA = distributionA.reduce((a, b) => a + b) / distributionA.length;
	const meanB = distributionB.reduce((a, b) => a + b) / distributionB.length;
	const actualDifference = Math.abs(meanA - meanB);

	const length = distributionA.length;
	let trialDifferencesGreaterOrEqualToActualCount = 0;
	for (let trialIdx = 0; trialIdx < FISHER_RANDOMIZATION_TRIALS; trialIdx++) {
	let sumX = 0;
	let sumY = 0;

	for (let i = 0; i < length; i++) {
	if (Math.random() < 0.5) {
	sumX += distributionA[i];
	sumY += distributionB[i];
	} else {
	sumX += distributionB[i];
	sumY += distributionA[i];
	}
	}

	trialDifferencesGreaterOrEqualToActualCount +=
	Math.abs(sumX / length - sumY / length) >= actualDifference ? 1 : 0;
	}

	return [
	(trialDifferencesGreaterOrEqualToActualCount + 1) /
	(FISHER_RANDOMIZATION_TRIALS + 1),
	meanA,
	meanB,
	];
	}