File size: 2,601 Bytes
fb9c306
 
 
e25b548
fb9c306
 
 
 
 
 
 
e25b548
fb9c306
 
 
e25b548
5f219fc
e25b548
fb9c306
e25b548
 
5f219fc
e25b548
fb9c306
 
 
5f219fc
fb9c306
e25b548
06c3276
 
 
 
e25b548
 
fb9c306
5f219fc
 
 
fb9c306
e25b548
 
 
 
 
fb9c306
06c3276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb9c306
e25b548
fb9c306
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
To use Google Web Search API,
follow the instructions [here](https://developers.google.com/custom-search/v1/overview)
to get your Google searcher api key.

To use Bing Web Search API,
follow the instructions [here](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api)
and obtain your Bing subscription key.
"""


from graphgen.utils import logger, run_concurrent


async def search_all(
    seed_data: dict,
    search_config: dict,
) -> dict:
    """
    Perform searches across multiple search types and aggregate the results.
    :param seed_data: A dictionary containing seed data with entity names.
    :param search_config: A dictionary specifying which data sources to use for searching.
    :return: A dictionary with
    """

    results = {}
    data_sources = search_config.get("data_sources", [])

    for data_source in data_sources:
        data = list(seed_data.values())
        data = [d["content"] for d in data if "content" in d]
        data = list(set(data))  # Remove duplicates

        if data_source == "uniprot":
            from graphgen.models import UniProtSearch

            uniprot_search_client = UniProtSearch(
                **search_config.get("uniprot_params", {})
            )

            uniprot_results = await run_concurrent(
                uniprot_search_client.search,
                data,
                desc="Searching UniProt database",
                unit="keyword",
            )
            results[data_source] = uniprot_results

        elif data_source == "ncbi":
            from graphgen.models import NCBISearch

            ncbi_search_client = NCBISearch(
                **search_config.get("ncbi_params", {})
            )

            ncbi_results = await run_concurrent(
                ncbi_search_client.search,
                data,
                desc="Searching NCBI database",
                unit="keyword",
            )
            results[data_source] = ncbi_results

        elif data_source == "rnacentral":
            from graphgen.models import RNACentralSearch

            rnacentral_search_client = RNACentralSearch(
                **search_config.get("rnacentral_params", {})
            )

            rnacentral_results = await run_concurrent(
                rnacentral_search_client.search,
                data,
                desc="Searching RNAcentral database",
                unit="keyword",
            )
            results[data_source] = rnacentral_results

        else:
            logger.error("Data source %s not supported.", data_source)
            continue

    return results