Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
# cbow_logic.py | |
import gensim | |
import os | |
import argparse | |
from typing import List, Tuple | |
import shlex | |
class MeaningCalculator: | |
def __init__(self, model_path: str = "/models/cbow/cbow_model.kv"): | |
if not os.path.exists(model_path): | |
raise FileNotFoundError(f"Model not found at: {model_path}") | |
self.model = gensim.models.KeyedVectors.load(model_path, mmap='r') | |
def evaluate_expression(self, expression: str, topn: int = 10) -> List[Tuple[str, float]]: | |
# Evaluate expressions like '"new york" - city + capital'. | |
tokens = shlex.split(expression) # Handles quoted terms properly | |
positive = [] | |
negative = [] | |
current_op = "+" | |
for token in tokens: | |
print(token) | |
if token in ["+", "-"]: | |
current_op = token | |
else: | |
if current_op == "+": | |
positive.append(token) | |
else: | |
negative.append(token) | |
try: | |
return self.model.most_similar(positive=positive, negative=negative, topn=topn) | |
except KeyError as e: | |
return [("InputError", 0.0)] | |
from gensim.models import KeyedVectors | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Evaluate word vector expressions using CBOW.") | |
parser.add_argument("expression", type=str, help="Expression like 'king - man + woman'") | |
parser.add_argument("--model_path", type=str, default="./models/cbow_model.kv", help="Path to CBOW model") | |
args = parser.parse_args() | |
calc = MeaningCalculator(model_path=args.model_path) | |
results = calc.evaluate_expression(args.expression) | |
print(f"\nExpression: {args.expression}\nTop Results:") | |
for word, score in results: | |
print(f" {word:<15} {score:.4f}") |