Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
b7519b4
1
Parent(s):
f29e862
Auto-sync from demo at Wed Nov 26 03:46:06 UTC 2025
Browse files
graphgen/configs/search_config.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
pipeline:
|
| 2 |
- name: read
|
| 3 |
params:
|
| 4 |
-
input_file: resources/input_examples/search_demo.
|
| 5 |
|
| 6 |
- name: search
|
| 7 |
params:
|
|
|
|
| 1 |
pipeline:
|
| 2 |
- name: read
|
| 3 |
params:
|
| 4 |
+
input_file: resources/input_examples/search_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
|
| 5 |
|
| 6 |
- name: search
|
| 7 |
params:
|
graphgen/models/searcher/db/uniprot_searcher.py
CHANGED
|
@@ -27,12 +27,16 @@ def _get_pool():
|
|
| 27 |
return ThreadPoolExecutor(max_workers=10)
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
class UniProtSearch(BaseSearcher):
|
| 31 |
"""
|
| 32 |
UniProt Search client to searcher with UniProt.
|
| 33 |
1) Get the protein by accession number.
|
| 34 |
2) Search with keywords or protein names (fuzzy searcher).
|
| 35 |
-
3) Search with FASTA sequence (BLAST searcher).
|
| 36 |
"""
|
| 37 |
|
| 38 |
def __init__(self, use_local_blast: bool = False, local_blast_db: str = "sp_db"):
|
|
@@ -230,22 +234,21 @@ class UniProtSearch(BaseSearcher):
|
|
| 230 |
if query.startswith(">") or re.fullmatch(
|
| 231 |
r"[ACDEFGHIKLMNPQRSTVWY\s]+", query, re.I
|
| 232 |
):
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
| 236 |
|
| 237 |
# check if accession number
|
| 238 |
elif re.fullmatch(r"[A-NR-Z0-9]{6,10}", query, re.I):
|
| 239 |
-
|
|
|
|
|
|
|
| 240 |
|
| 241 |
else:
|
| 242 |
# otherwise treat as keyword
|
| 243 |
-
|
| 244 |
|
| 245 |
-
result = await coro
|
| 246 |
if result:
|
| 247 |
result["_search_query"] = query
|
| 248 |
return result
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
# TODO: use local UniProt database for large-scale searchs
|
|
|
|
| 27 |
return ThreadPoolExecutor(max_workers=10)
|
| 28 |
|
| 29 |
|
| 30 |
+
# ensure only one BLAST searcher at a time
|
| 31 |
+
_blast_lock = asyncio.Lock()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
class UniProtSearch(BaseSearcher):
|
| 35 |
"""
|
| 36 |
UniProt Search client to searcher with UniProt.
|
| 37 |
1) Get the protein by accession number.
|
| 38 |
2) Search with keywords or protein names (fuzzy searcher).
|
| 39 |
+
3) Search with FASTA sequence (BLAST searcher). Note that NCBIWWW does not support async.
|
| 40 |
"""
|
| 41 |
|
| 42 |
def __init__(self, use_local_blast: bool = False, local_blast_db: str = "sp_db"):
|
|
|
|
| 234 |
if query.startswith(">") or re.fullmatch(
|
| 235 |
r"[ACDEFGHIKLMNPQRSTVWY\s]+", query, re.I
|
| 236 |
):
|
| 237 |
+
async with _blast_lock:
|
| 238 |
+
result = await loop.run_in_executor(
|
| 239 |
+
_get_pool(), self.get_by_fasta, query, threshold
|
| 240 |
+
)
|
| 241 |
|
| 242 |
# check if accession number
|
| 243 |
elif re.fullmatch(r"[A-NR-Z0-9]{6,10}", query, re.I):
|
| 244 |
+
result = await loop.run_in_executor(
|
| 245 |
+
_get_pool(), self.get_by_accession, query
|
| 246 |
+
)
|
| 247 |
|
| 248 |
else:
|
| 249 |
# otherwise treat as keyword
|
| 250 |
+
result = await loop.run_in_executor(_get_pool(), self.get_best_hit, query)
|
| 251 |
|
|
|
|
| 252 |
if result:
|
| 253 |
result["_search_query"] = query
|
| 254 |
return result
|
|
|
|
|
|
|
|
|