Spaces:
Running
Running
| import pandas as pd | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from typing import List, Any, Optional | |
| from collections import defaultdict, deque | |
| def predict_topic_nth_degree( | |
| new_article_title: str, | |
| new_article_embedding: List[float], | |
| edges: List[str], | |
| G: Any, | |
| max_depth: int = 1, | |
| is_weighted: bool = False, | |
| decay_factor: float = 1.0, | |
| ) -> Optional[str]: | |
| queue = deque() | |
| visited = set() | |
| visited.add(new_article_title) | |
| for ref in edges: | |
| if ref in G and ref not in visited: | |
| visited.add(ref) | |
| queue.append((ref, 1)) | |
| if not queue: | |
| return None | |
| topic_scores = defaultdict(float) | |
| while queue: | |
| current_node, current_depth = queue.popleft() | |
| node_data = G.nodes[current_node] | |
| topic = node_data.get("label") | |
| if topic: | |
| if is_weighted: | |
| neighbor_embedding = node_data["embedding"] | |
| base_score = cosine_similarity( | |
| [new_article_embedding], [neighbor_embedding] | |
| )[0][0] | |
| else: | |
| base_score = 1.0 | |
| weighted_score = base_score * (decay_factor ** (current_depth - 1)) | |
| topic_scores[topic] += weighted_score | |
| if current_depth < max_depth: | |
| for neighbor in G.neighbors(current_node): | |
| if neighbor not in visited: | |
| visited.add(neighbor) | |
| queue.append((neighbor, current_depth + 1)) | |
| if not topic_scores: | |
| return None | |
| columns = ["Class", "Score"] | |
| result_df = pd.DataFrame( | |
| [(topic, score) for topic, score in topic_scores.items()], columns=columns | |
| ).sort_values(by="Score", ascending=False) | |
| return result_df |