|
|
import os |
|
|
import re |
|
|
import tempfile |
|
|
import numpy as np |
|
|
import torch |
|
|
from pathlib import Path |
|
|
import argparse |
|
|
import pandas as pd |
|
|
from rdkit import Chem |
|
|
from tqdm import tqdm |
|
|
|
|
|
try: |
|
|
import utils |
|
|
except ModuleNotFoundError as e: |
|
|
print(e) |
|
|
|
|
|
|
|
|
def calculate_smina_score(pdb_file, sdf_file): |
|
|
|
|
|
out = os.popen(f'smina.static -l {sdf_file} -r {pdb_file} ' |
|
|
f'--score_only').read() |
|
|
matches = re.findall( |
|
|
r"Affinity:[ ]+([+-]?[0-9]*[.]?[0-9]+)[ ]+\(kcal/mol\)", out) |
|
|
return [float(x) for x in matches] |
|
|
|
|
|
|
|
|
def smina_score(rdmols, receptor_file): |
|
|
""" |
|
|
Calculate smina score |
|
|
:param rdmols: List of RDKit molecules |
|
|
:param receptor_file: Receptor pdb/pdbqt file or list of receptor files |
|
|
:return: Smina score for each input molecule (list) |
|
|
""" |
|
|
|
|
|
if isinstance(receptor_file, list): |
|
|
scores = [] |
|
|
for mol, rec_file in zip(rdmols, receptor_file): |
|
|
with tempfile.NamedTemporaryFile(suffix='.sdf') as tmp: |
|
|
tmp_file = tmp.name |
|
|
utils.write_sdf_file(tmp_file, [mol]) |
|
|
scores.extend(calculate_smina_score(rec_file, tmp_file)) |
|
|
|
|
|
|
|
|
else: |
|
|
with tempfile.NamedTemporaryFile(suffix='.sdf') as tmp: |
|
|
tmp_file = tmp.name |
|
|
utils.write_sdf_file(tmp_file, rdmols) |
|
|
scores = calculate_smina_score(receptor_file, tmp_file) |
|
|
|
|
|
return scores |
|
|
|
|
|
|
|
|
def sdf_to_pdbqt(sdf_file, pdbqt_outfile, mol_id): |
|
|
os.popen(f'obabel {sdf_file} -O {pdbqt_outfile} ' |
|
|
f'-f {mol_id + 1} -l {mol_id + 1}').read() |
|
|
return pdbqt_outfile |
|
|
|
|
|
|
|
|
def calculate_qvina2_score(receptor_file, sdf_file, out_dir, size=20, |
|
|
exhaustiveness=16, return_rdmol=False): |
|
|
|
|
|
receptor_file = Path(receptor_file) |
|
|
sdf_file = Path(sdf_file) |
|
|
|
|
|
if receptor_file.suffix == '.pdb': |
|
|
|
|
|
receptor_pdbqt_file = Path(out_dir, receptor_file.stem + '.pdbqt') |
|
|
os.popen(f'prepare_receptor4.py -r {receptor_file} -O {receptor_pdbqt_file}') |
|
|
else: |
|
|
receptor_pdbqt_file = receptor_file |
|
|
|
|
|
scores = [] |
|
|
rdmols = [] |
|
|
suppl = Chem.SDMolSupplier(str(sdf_file), sanitize=False) |
|
|
for i, mol in enumerate(suppl): |
|
|
ligand_name = f'{sdf_file.stem}_{i}' |
|
|
|
|
|
ligand_pdbqt_file = Path(out_dir, ligand_name + '.pdbqt') |
|
|
out_sdf_file = Path(out_dir, ligand_name + '_out.sdf') |
|
|
|
|
|
if out_sdf_file.exists(): |
|
|
with open(out_sdf_file, 'r') as f: |
|
|
scores.append( |
|
|
min([float(x.split()[2]) for x in f.readlines() |
|
|
if x.startswith(' VINA RESULT:')]) |
|
|
) |
|
|
|
|
|
else: |
|
|
sdf_to_pdbqt(sdf_file, ligand_pdbqt_file, i) |
|
|
|
|
|
|
|
|
cx, cy, cz = mol.GetConformer().GetPositions().mean(0) |
|
|
|
|
|
|
|
|
out = os.popen( |
|
|
f'qvina2.1 --receptor {receptor_pdbqt_file} ' |
|
|
f'--ligand {ligand_pdbqt_file} ' |
|
|
f'--center_x {cx:.4f} --center_y {cy:.4f} --center_z {cz:.4f} ' |
|
|
f'--size_x {size} --size_y {size} --size_z {size} ' |
|
|
f'--exhaustiveness {exhaustiveness}' |
|
|
).read() |
|
|
|
|
|
|
|
|
ligand_pdbqt_file.unlink() |
|
|
|
|
|
if '-----+------------+----------+----------' not in out: |
|
|
scores.append(np.nan) |
|
|
continue |
|
|
|
|
|
out_split = out.splitlines() |
|
|
best_idx = out_split.index('-----+------------+----------+----------') + 1 |
|
|
best_line = out_split[best_idx].split() |
|
|
assert best_line[0] == '1' |
|
|
scores.append(float(best_line[1])) |
|
|
|
|
|
out_pdbqt_file = Path(out_dir, ligand_name + '_out.pdbqt') |
|
|
if out_pdbqt_file.exists(): |
|
|
os.popen(f'obabel {out_pdbqt_file} -O {out_sdf_file}').read() |
|
|
|
|
|
|
|
|
out_pdbqt_file.unlink() |
|
|
|
|
|
if return_rdmol: |
|
|
rdmol = Chem.SDMolSupplier(str(out_sdf_file))[0] |
|
|
rdmols.append(rdmol) |
|
|
|
|
|
if return_rdmol: |
|
|
return scores, rdmols |
|
|
else: |
|
|
return scores |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
parser = argparse.ArgumentParser('QuickVina evaluation') |
|
|
parser.add_argument('--pdbqt_dir', type=Path, |
|
|
help='Receptor files in pdbqt format') |
|
|
parser.add_argument('--sdf_dir', type=Path, default=None, |
|
|
help='Ligand files in sdf format') |
|
|
parser.add_argument('--sdf_files', type=Path, nargs='+', default=None) |
|
|
parser.add_argument('--out_dir', type=Path) |
|
|
parser.add_argument('--write_csv', action='store_true') |
|
|
parser.add_argument('--write_dict', action='store_true') |
|
|
parser.add_argument('--dataset', type=str, default='moad') |
|
|
args = parser.parse_args() |
|
|
|
|
|
assert (args.sdf_dir is not None) ^ (args.sdf_files is not None) |
|
|
|
|
|
args.out_dir.mkdir(exist_ok=True) |
|
|
|
|
|
results = {'receptor': [], 'ligand': [], 'scores': []} |
|
|
results_dict = {} |
|
|
sdf_files = list(args.sdf_dir.glob('[!.]*.sdf')) \ |
|
|
if args.sdf_dir is not None else args.sdf_files |
|
|
pbar = tqdm(sdf_files) |
|
|
for sdf_file in pbar: |
|
|
pbar.set_description(f'Processing {sdf_file.name}') |
|
|
|
|
|
if args.dataset == 'moad': |
|
|
""" |
|
|
Ligand file names should be of the following form: |
|
|
<receptor-name>_<pocket-id>_<some-suffix>.sdf |
|
|
where <receptor-name> and <pocket-id> cannot contain any |
|
|
underscores, e.g.: 1abc-bio1_pocket0_gen.sdf |
|
|
""" |
|
|
ligand_name = sdf_file.stem |
|
|
receptor_name, pocket_id, *suffix = ligand_name.split('_') |
|
|
suffix = '_'.join(suffix) |
|
|
receptor_file = Path(args.pdbqt_dir, receptor_name + '.pdbqt') |
|
|
elif args.dataset == 'crossdocked': |
|
|
ligand_name = sdf_file.stem |
|
|
receptor_name = ligand_name[:-4] |
|
|
receptor_file = Path(args.pdbqt_dir, receptor_name + '.pdbqt') |
|
|
|
|
|
|
|
|
scores, rdmols = calculate_qvina2_score( |
|
|
receptor_file, sdf_file, args.out_dir, return_rdmol=True) |
|
|
|
|
|
|
|
|
|
|
|
results['receptor'].append(str(receptor_file)) |
|
|
results['ligand'].append(str(sdf_file)) |
|
|
results['scores'].append(scores) |
|
|
|
|
|
if args.write_dict: |
|
|
results_dict[ligand_name] = { |
|
|
'receptor': str(receptor_file), |
|
|
'ligand': str(sdf_file), |
|
|
'scores': scores, |
|
|
'rmdols': rdmols |
|
|
} |
|
|
|
|
|
if args.write_csv: |
|
|
df = pd.DataFrame.from_dict(results) |
|
|
df.to_csv(Path(args.out_dir, 'qvina2_scores.csv')) |
|
|
|
|
|
if args.write_dict: |
|
|
torch.save(results_dict, Path(args.out_dir, 'qvina2_scores.pt')) |
|
|
|