DataMiningProjectDemo / src /heuristic.py
Metin's picture
Clean up
fdfe8da
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Any, Optional
from collections import defaultdict, deque
def predict_topic_nth_degree(
new_article_title: str,
new_article_embedding: List[float],
edges: List[str],
G: Any,
max_depth: int = 1,
is_weighted: bool = False,
decay_factor: float = 1.0,
) -> Optional[str]:
queue = deque()
visited = set()
visited.add(new_article_title)
for ref in edges:
if ref in G and ref not in visited:
visited.add(ref)
queue.append((ref, 1))
if not queue:
return None
topic_scores = defaultdict(float)
while queue:
current_node, current_depth = queue.popleft()
node_data = G.nodes[current_node]
topic = node_data.get("label")
if topic:
if is_weighted:
neighbor_embedding = node_data["embedding"]
base_score = cosine_similarity(
[new_article_embedding], [neighbor_embedding]
)[0][0]
else:
base_score = 1.0
weighted_score = base_score * (decay_factor ** (current_depth - 1))
topic_scores[topic] += weighted_score
if current_depth < max_depth:
for neighbor in G.neighbors(current_node):
if neighbor not in visited:
visited.add(neighbor)
queue.append((neighbor, current_depth + 1))
if not topic_scores:
return None
columns = ["Class", "Score"]
result_df = pd.DataFrame(
[(topic, score) for topic, score in topic_scores.items()], columns=columns
).sort_values(by="Score", ascending=False)
return result_df