Spaces:
Running
Running
File size: 1,597 Bytes
fb9c306 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import trafilatura
from tqdm.asyncio import tqdm_asyncio as tqdm_async
from graphgen.models import BingSearch
from graphgen.utils import logger
async def _process_single_entity(
entity_name: str, bing_search_client: BingSearch
) -> str | None:
"""
Process single entity by searching Bing.
:param entity_name: The name of the entity to search.
:param bing_search_client: The Bing search client.
:return: Summary of the entity or None if not found.
"""
search_results = bing_search_client.search(entity_name)
if not search_results:
return None
# Get more details from the first search result
first_result = search_results[0]
content = trafilatura.fetch_url(first_result["url"])
summary = trafilatura.extract(content, include_comments=False, include_links=False)
summary = summary.strip()
logger.info(
"Entity %s search result: %s",
entity_name,
summary,
)
return summary
async def search_bing(
bing_search_client: BingSearch,
entities: set[str],
) -> dict[str, str]:
"""
Search with Bing and return the contexts.
:return:
"""
bing_data = {}
async for entity in tqdm_async(
entities, desc="Searching Bing", total=len(entities)
):
try:
summary = await _process_single_entity(entity, bing_search_client)
if summary:
bing_data[entity] = summary
except Exception as e: # pylint: disable=broad-except
logger.error("Error processing entity %s: %s", entity, str(e))
return bing_data
|