import numpy as np options = [str(i) for i in range(10)] + ["Text"] # (1) A one-hot moving from token 0 to token 10 (“Text”) dirac = [ { "name": f"Dirac: all mass on token {options[i]}", "values": [1.0 if j == i else 0.0 for j in range(11)], "ground_truth": "4", "explanation": "A Dirac distribution: all probability on a single token.", } for i in range(11) ] # (2) A Gaussian with peak_mass=0.6 at center, remaining mass=0.4 spread by a Gaussian --- def make_gauss_values(center, n=11, sigma=1.5, peak_mass=0.6): xs = np.arange(n) # unnormalized Gaussian kernel = np.exp(-0.5 * ((xs - center) / sigma) ** 2) # zero out the center, re-normalize the *other* weights to sum to 1 others = kernel.copy() others[center] = 0.0 others /= others.sum() # allocate 0.6 to the center, 0.4 to the rest vals = others * (1.0 - peak_mass) vals[center] = peak_mass return vals.tolist() gauss = [ { "name": f"Gaussian: center at token {options[c]}", "values": make_gauss_values(c), "ground_truth": "4", "explanation": "Gaussian-style: 0.6 mass at the highlighted token, 0.4 spread smoothly to its neighbors.", } for c in range(11) ] def make_bimodal_scenarios(gt_token: str, options: list[str]) -> list[dict]: """ Build a list of { name, values, explanation } dicts, where each scenario splits 50/50 between tokens (gt±offset), wrapping around via Python’s % operator. """ n = len(options) gt_idx = options.index(gt_token) scenarios = [] for offset in range(n): left = (gt_idx - offset) % n right = (gt_idx + offset) % n # build the 50/50 (or 1.0 at gt when offset=0) vector vals = [0.0] * n if left == right: vals[left] = 1.0 else: vals[left] = 0.5 vals[right] = 0.5 label = f"({options[left]}, {options[right]})" scenarios.append( { "name": label, "values": vals, "explanation": "50/50 mass at these two tokens (wrapping).", } ) return scenarios