github-actions[bot]
Auto-sync from demo at Thu Aug 28 10:06:44 UTC 2025
ba8b592
raw
history blame
1.6 kB
import trafilatura
from tqdm.asyncio import tqdm_asyncio as tqdm_async
from graphgen.models import BingSearch
from graphgen.utils import logger
async def _process_single_entity(
entity_name: str, bing_search_client: BingSearch
) -> str | None:
"""
Process single entity by searching Bing.
:param entity_name: The name of the entity to search.
:param bing_search_client: The Bing search client.
:return: Summary of the entity or None if not found.
"""
search_results = bing_search_client.search(entity_name)
if not search_results:
return None
# Get more details from the first search result
first_result = search_results[0]
content = trafilatura.fetch_url(first_result["url"])
summary = trafilatura.extract(content, include_comments=False, include_links=False)
summary = summary.strip()
logger.info(
"Entity %s search result: %s",
entity_name,
summary,
)
return summary
async def search_bing(
bing_search_client: BingSearch,
entities: set[str],
) -> dict[str, str]:
"""
Search with Bing and return the contexts.
:return:
"""
bing_data = {}
async for entity in tqdm_async(
entities, desc="Searching Bing", total=len(entities)
):
try:
summary = await _process_single_entity(entity, bing_search_client)
if summary:
bing_data[entity] = summary
except Exception as e: # pylint: disable=broad-except
logger.error("Error processing entity %s: %s", entity, str(e))
return bing_data