# cbow_logic.py import gensim import os import argparse from typing import List, Tuple import shlex class MeaningCalculator: def __init__(self, model_path: str = "/models/cbow/cbow_model.kv"): if not os.path.exists(model_path): raise FileNotFoundError(f"Model not found at: {model_path}") self.model = gensim.models.KeyedVectors.load(model_path, mmap='r') def evaluate_expression(self, expression: str, topn: int = 10) -> List[Tuple[str, float]]: # Evaluate expressions like '"new york" - city + capital'. tokens = shlex.split(expression) # Handles quoted terms properly positive = [] negative = [] current_op = "+" for token in tokens: print(token) if token in ["+", "-"]: current_op = token else: if current_op == "+": positive.append(token) else: negative.append(token) try: return self.model.most_similar(positive=positive, negative=negative, topn=topn) except KeyError as e: return [("InputError", 0.0)] from gensim.models import KeyedVectors if __name__ == "__main__": parser = argparse.ArgumentParser(description="Evaluate word vector expressions using CBOW.") parser.add_argument("expression", type=str, help="Expression like 'king - man + woman'") parser.add_argument("--model_path", type=str, default="./models/cbow_model.kv", help="Path to CBOW model") args = parser.parse_args() calc = MeaningCalculator(model_path=args.model_path) results = calc.evaluate_expression(args.expression) print(f"\nExpression: {args.expression}\nTop Results:") for word, score in results: print(f" {word:<15} {score:.4f}")