import pandas as pd from sklearn.metrics.pairwise import cosine_similarity from typing import List, Any, Optional from collections import defaultdict, deque def predict_topic_nth_degree( new_article_title: str, new_article_embedding: List[float], edges: List[str], G: Any, max_depth: int = 1, is_weighted: bool = False, decay_factor: float = 1.0, ) -> Optional[str]: queue = deque() visited = set() visited.add(new_article_title) for ref in edges: if ref in G and ref not in visited: visited.add(ref) queue.append((ref, 1)) if not queue: return None topic_scores = defaultdict(float) while queue: current_node, current_depth = queue.popleft() node_data = G.nodes[current_node] topic = node_data.get("label") if topic: if is_weighted: neighbor_embedding = node_data["embedding"] base_score = cosine_similarity( [new_article_embedding], [neighbor_embedding] )[0][0] else: base_score = 1.0 weighted_score = base_score * (decay_factor ** (current_depth - 1)) topic_scores[topic] += weighted_score if current_depth < max_depth: for neighbor in G.neighbors(current_node): if neighbor not in visited: visited.add(neighbor) queue.append((neighbor, current_depth + 1)) if not topic_scores: return None columns = ["Class", "Score"] result_df = pd.DataFrame( [(topic, score) for topic, score in topic_scores.items()], columns=columns ).sort_values(by="Score", ascending=False) return result_df