Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| # cbow_logic.py | |
| import gensim | |
| import os | |
| import argparse | |
| from typing import List, Tuple | |
| import shlex | |
| class MeaningCalculator: | |
| def __init__(self, model_path: str = "/models/cbow/cbow_model.kv"): | |
| if not os.path.exists(model_path): | |
| raise FileNotFoundError(f"Model not found at: {model_path}") | |
| self.model = gensim.models.KeyedVectors.load(model_path, mmap='r') | |
| def evaluate_expression(self, expression: str, topn: int = 10) -> List[Tuple[str, float]]: | |
| # Evaluate expressions like '"new york" - city + capital'. | |
| tokens = shlex.split(expression) # Handles quoted terms properly | |
| positive = [] | |
| negative = [] | |
| current_op = "+" | |
| for token in tokens: | |
| print(token) | |
| if token in ["+", "-"]: | |
| current_op = token | |
| else: | |
| if current_op == "+": | |
| positive.append(token) | |
| else: | |
| negative.append(token) | |
| try: | |
| return self.model.most_similar(positive=positive, negative=negative, topn=topn) | |
| except KeyError as e: | |
| return [("InputError", 0.0)] | |
| from gensim.models import KeyedVectors | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Evaluate word vector expressions using CBOW.") | |
| parser.add_argument("expression", type=str, help="Expression like 'king - man + woman'") | |
| parser.add_argument("--model_path", type=str, default="./models/cbow_model.kv", help="Path to CBOW model") | |
| args = parser.parse_args() | |
| calc = MeaningCalculator(model_path=args.model_path) | |
| results = calc.evaluate_expression(args.expression) | |
| print(f"\nExpression: {args.expression}\nTop Results:") | |
| for word, score in results: | |
| print(f" {word:<15} {score:.4f}") |