File size: 2,069 Bytes
6e7d4ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import warnings
import numpy as np
import prody
prody.confProDy(verbosity='none')
from prody import parsePDB, ANM


def pdb_to_normal_modes(pdb_file, num_modes=5, nmax=5000):
    """
    Compute normal modes for a PDB file using an Anisotropic Network Model (ANM)
    http://prody.csb.pitt.edu/tutorials/enm_analysis/anm.html (accessed 01/11/2023)
    """
    protein = parsePDB(pdb_file, model=1).select('calpha')

    if len(protein) > nmax:
        warnings.warn("Protein is too big. Returning zeros...")
        eig_vecs = np.zeros((len(protein), 3, num_modes))

    else:
        # build Hessian
        anm = ANM('ANM analysis')
        anm.buildHessian(protein, cutoff=15.0, gamma=1.0)

        # calculate normal modes
        anm.calcModes(num_modes, zeros=False)

        # only use slowest modes
        eig_vecs = anm.getEigvecs()  # shape: (num_atoms * 3, num_modes)
        eig_vecs = eig_vecs.reshape(len(protein), 3, num_modes)
        # eig_vals = anm.getEigvals()  # shape: (num_modes,)

    nm_dict = {}
    for atom, nm_vec in zip(protein, eig_vecs):
        chain = atom.getChid()
        resi = atom.getResnum()
        name = atom.getName()
        nm_dict[(chain, resi, name)] = nm_vec.T

    return nm_dict


if __name__ == "__main__":
    import argparse
    from pathlib import Path
    import torch
    from tqdm import tqdm

    parser = argparse.ArgumentParser()
    parser.add_argument('basedir', type=Path)
    parser.add_argument('--outfile', type=Path, default=None)
    args = parser.parse_args()

    # Read data split
    split_path = Path(args.basedir, 'split_by_name.pt')
    data_split = torch.load(split_path)

    pockets = [x[0] for split in data_split.values() for x in split]

    all_normal_modes = {}
    for p in tqdm(pockets):
        pdb_file = Path(args.basedir, 'crossdocked_pocket10', p)

        try:
            nm_dict = pdb_to_normal_modes(str(pdb_file))
            all_normal_modes[p] = nm_dict
        except AttributeError as e:
            warnings.warn(str(e))

    np.save(args.outfile, all_normal_modes)