Spaces:
Running
Running
File size: 2,601 Bytes
fb9c306 e25b548 fb9c306 e25b548 fb9c306 e25b548 5f219fc e25b548 fb9c306 e25b548 5f219fc e25b548 fb9c306 5f219fc fb9c306 e25b548 06c3276 e25b548 fb9c306 5f219fc fb9c306 e25b548 fb9c306 06c3276 fb9c306 e25b548 fb9c306 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
"""
To use Google Web Search API,
follow the instructions [here](https://developers.google.com/custom-search/v1/overview)
to get your Google searcher api key.
To use Bing Web Search API,
follow the instructions [here](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api)
and obtain your Bing subscription key.
"""
from graphgen.utils import logger, run_concurrent
async def search_all(
seed_data: dict,
search_config: dict,
) -> dict:
"""
Perform searches across multiple search types and aggregate the results.
:param seed_data: A dictionary containing seed data with entity names.
:param search_config: A dictionary specifying which data sources to use for searching.
:return: A dictionary with
"""
results = {}
data_sources = search_config.get("data_sources", [])
for data_source in data_sources:
data = list(seed_data.values())
data = [d["content"] for d in data if "content" in d]
data = list(set(data)) # Remove duplicates
if data_source == "uniprot":
from graphgen.models import UniProtSearch
uniprot_search_client = UniProtSearch(
**search_config.get("uniprot_params", {})
)
uniprot_results = await run_concurrent(
uniprot_search_client.search,
data,
desc="Searching UniProt database",
unit="keyword",
)
results[data_source] = uniprot_results
elif data_source == "ncbi":
from graphgen.models import NCBISearch
ncbi_search_client = NCBISearch(
**search_config.get("ncbi_params", {})
)
ncbi_results = await run_concurrent(
ncbi_search_client.search,
data,
desc="Searching NCBI database",
unit="keyword",
)
results[data_source] = ncbi_results
elif data_source == "rnacentral":
from graphgen.models import RNACentralSearch
rnacentral_search_client = RNACentralSearch(
**search_config.get("rnacentral_params", {})
)
rnacentral_results = await run_concurrent(
rnacentral_search_client.search,
data,
desc="Searching RNAcentral database",
unit="keyword",
)
results[data_source] = rnacentral_results
else:
logger.error("Data source %s not supported.", data_source)
continue
return results
|