github-actions[bot] commited on
Commit
b7519b4
·
1 Parent(s): f29e862

Auto-sync from demo at Wed Nov 26 03:46:06 UTC 2025

Browse files
graphgen/configs/search_config.yaml CHANGED
@@ -1,7 +1,7 @@
1
  pipeline:
2
  - name: read
3
  params:
4
- input_file: resources/input_examples/search_demo.json # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
5
 
6
  - name: search
7
  params:
 
1
  pipeline:
2
  - name: read
3
  params:
4
+ input_file: resources/input_examples/search_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
5
 
6
  - name: search
7
  params:
graphgen/models/searcher/db/uniprot_searcher.py CHANGED
@@ -27,12 +27,16 @@ def _get_pool():
27
  return ThreadPoolExecutor(max_workers=10)
28
 
29
 
 
 
 
 
30
  class UniProtSearch(BaseSearcher):
31
  """
32
  UniProt Search client to searcher with UniProt.
33
  1) Get the protein by accession number.
34
  2) Search with keywords or protein names (fuzzy searcher).
35
- 3) Search with FASTA sequence (BLAST searcher).
36
  """
37
 
38
  def __init__(self, use_local_blast: bool = False, local_blast_db: str = "sp_db"):
@@ -230,22 +234,21 @@ class UniProtSearch(BaseSearcher):
230
  if query.startswith(">") or re.fullmatch(
231
  r"[ACDEFGHIKLMNPQRSTVWY\s]+", query, re.I
232
  ):
233
- coro = loop.run_in_executor(
234
- _get_pool(), self.get_by_fasta, query, threshold
235
- )
 
236
 
237
  # check if accession number
238
  elif re.fullmatch(r"[A-NR-Z0-9]{6,10}", query, re.I):
239
- coro = loop.run_in_executor(_get_pool(), self.get_by_accession, query)
 
 
240
 
241
  else:
242
  # otherwise treat as keyword
243
- coro = loop.run_in_executor(_get_pool(), self.get_best_hit, query)
244
 
245
- result = await coro
246
  if result:
247
  result["_search_query"] = query
248
  return result
249
-
250
-
251
- # TODO: use local UniProt database for large-scale searchs
 
27
  return ThreadPoolExecutor(max_workers=10)
28
 
29
 
30
+ # ensure only one BLAST searcher at a time
31
+ _blast_lock = asyncio.Lock()
32
+
33
+
34
  class UniProtSearch(BaseSearcher):
35
  """
36
  UniProt Search client to searcher with UniProt.
37
  1) Get the protein by accession number.
38
  2) Search with keywords or protein names (fuzzy searcher).
39
+ 3) Search with FASTA sequence (BLAST searcher). Note that NCBIWWW does not support async.
40
  """
41
 
42
  def __init__(self, use_local_blast: bool = False, local_blast_db: str = "sp_db"):
 
234
  if query.startswith(">") or re.fullmatch(
235
  r"[ACDEFGHIKLMNPQRSTVWY\s]+", query, re.I
236
  ):
237
+ async with _blast_lock:
238
+ result = await loop.run_in_executor(
239
+ _get_pool(), self.get_by_fasta, query, threshold
240
+ )
241
 
242
  # check if accession number
243
  elif re.fullmatch(r"[A-NR-Z0-9]{6,10}", query, re.I):
244
+ result = await loop.run_in_executor(
245
+ _get_pool(), self.get_by_accession, query
246
+ )
247
 
248
  else:
249
  # otherwise treat as keyword
250
+ result = await loop.run_in_executor(_get_pool(), self.get_best_hit, query)
251
 
 
252
  if result:
253
  result["_search_query"] = query
254
  return result