File size: 15,965 Bytes
6e7d4ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
import os
from rdkit import Chem
import torch
import numpy as np
# ------------------------------------------------------------------------------
# Computational
# ------------------------------------------------------------------------------
FLOAT_TYPE = torch.float32
INT_TYPE = torch.int64
# ------------------------------------------------------------------------------
# Type encoding/decoding
# ------------------------------------------------------------------------------
atom_dict = os.environ.get('ATOM_DICT')
if atom_dict == 'simple':
atom_encoder = {'C': 0, 'N': 1, 'O': 2, 'S': 3, 'B': 4, 'Br': 5, 'Cl': 6, 'P': 7, 'I': 8, 'F': 9, 'NOATOM': 10}
atom_decoder = ['C', 'N', 'O', 'S', 'B', 'Br', 'Cl', 'P', 'I', 'F', 'NOATOM']
else:
atom_encoder = {'C': 0, 'N': 1, 'O': 2, 'S': 3, 'B': 4, 'Br': 5, 'Cl': 6, 'P': 7, 'I': 8, 'F': 9, 'NH': 10, 'N+': 11, 'O-': 12, 'NOATOM': 13}
atom_decoder = ['C', 'N', 'O', 'S', 'B', 'Br', 'Cl', 'P', 'I', 'F', 'NH', 'N+', 'O-', 'NOATOM']
bond_encoder = {"NOBOND": 0, "SINGLE": 1, "DOUBLE": 2, "TRIPLE": 3, 'AROMATIC': 4}
bond_decoder = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC]
aa_encoder = {'A': 0, 'C': 1, 'D': 2, 'E': 3, 'F': 4, 'G': 5, 'H': 6, 'I': 7, 'K': 8, 'L': 9, 'M': 10, 'N': 11, 'P': 12, 'Q': 13, 'R': 14, 'S': 15, 'T': 16, 'V': 17, 'W': 18, 'Y': 19}
aa_decoder = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
residue_encoder = {'CA': 0, 'SS': 1}
residue_decoder = ['CA', 'SS']
residue_bond_encoder = {'CA-CA': 0, 'CA-SS': 1, 'NOBOND': 2}
residue_bond_decoder = ['CA-CA', 'CA-SS', None]
# aa_atom_index = {
# 'A': {'N': 0, 'C': 1, 'O': 2, 'CB': 3},
# 'C': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'SG': 4},
# 'D': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'OD1': 5, 'OD2': 6},
# 'E': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD': 5, 'OE1': 6, 'OE2': 7},
# 'F': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD1': 5, 'CD2': 6, 'CE1': 7, 'CE2': 8, 'CZ': 9},
# 'G': {'N': 0, 'C': 1, 'O': 2},
# 'H': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'ND1': 5, 'CD2': 6, 'CE1': 7, 'NE2': 8},
# 'I': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG1': 4, 'CG2': 5, 'CD1': 6},
# 'K': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD': 5, 'CE': 6, 'NZ': 7},
# 'L': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD1': 5, 'CD2': 6},
# 'M': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'SD': 5, 'CE': 6},
# 'N': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'OD1': 5, 'ND2': 6},
# 'P': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD': 5},
# 'Q': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD': 5, 'OE1': 6, 'NE2': 7},
# 'R': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD': 5, 'NE': 6, 'CZ': 7, 'NH1': 8, 'NH2': 9},
# 'S': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'OG': 4},
# 'T': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'OG1': 4, 'CG2': 5},
# 'V': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG1': 4, 'CG2': 5},
# 'W': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD1': 5, 'CD2': 6, 'NE1': 7, 'CE2': 8, 'CE3': 9, 'CZ2': 10, 'CZ3': 11, 'CH2': 12},
# 'Y': {'N': 0, 'C': 1, 'O': 2, 'CB': 3, 'CG': 4, 'CD1': 5, 'CD2': 6, 'CE1': 7, 'CE2': 8, 'CZ': 9, 'OH': 10},
# }
aa_atom_index = {
'A': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4},
'C': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'SG': 5},
'D': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'OD1': 6, 'OD2': 7},
'E': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD': 6, 'OE1': 7, 'OE2': 8},
'F': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD1': 6, 'CD2': 7, 'CE1': 8, 'CE2': 9, 'CZ': 10},
'G': {'N': 0, 'CA': 1, 'C': 2, 'O': 3},
'H': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'ND1': 6, 'CD2': 7, 'CE1': 8, 'NE2': 9},
'I': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG1': 5, 'CG2': 6, 'CD1': 7},
'K': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD': 6, 'CE': 7, 'NZ': 8},
'L': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD1': 6, 'CD2': 7},
'M': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'SD': 6, 'CE': 7},
'N': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'OD1': 6, 'ND2': 7},
'P': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD': 6},
'Q': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD': 6, 'OE1': 7, 'NE2': 8},
'R': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD': 6, 'NE': 7, 'CZ': 8, 'NH1': 9, 'NH2': 10},
'S': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'OG': 5},
'T': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'OG1': 5, 'CG2': 6},
'V': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG1': 5, 'CG2': 6},
'W': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD1': 6, 'CD2': 7, 'NE1': 8, 'CE2': 9, 'CE3': 10, 'CZ2': 11, 'CZ3': 12, 'CH2': 13},
'Y': {'N': 0, 'CA': 1, 'C': 2, 'O': 3, 'CB': 4, 'CG': 5, 'CD1': 6, 'CD2': 7, 'CE1': 8, 'CE2': 9, 'CZ': 10, 'OH': 11},
}
# ------------------------------------------------------------------------------
# NERF
# ------------------------------------------------------------------------------
# indicates whether atom exists
aa_atom_mask = {
'A': [True, True, True, True, True, False, False, False, False, False, False, False, False, False],
'C': [True, True, True, True, True, True, False, False, False, False, False, False, False, False],
'D': [True, True, True, True, True, True, True, True, False, False, False, False, False, False],
'E': [True, True, True, True, True, True, True, True, True, False, False, False, False, False],
'F': [True, True, True, True, True, True, True, True, True, True, True, False, False, False],
'G': [True, True, True, True, False, False, False, False, False, False, False, False, False, False],
'H': [True, True, True, True, True, True, True, True, True, True, False, False, False, False],
'I': [True, True, True, True, True, True, True, True, False, False, False, False, False, False],
'K': [True, True, True, True, True, True, True, True, True, False, False, False, False, False],
'L': [True, True, True, True, True, True, True, True, False, False, False, False, False, False],
'M': [True, True, True, True, True, True, True, True, False, False, False, False, False, False],
'N': [True, True, True, True, True, True, True, True, False, False, False, False, False, False],
'P': [True, True, True, True, True, True, True, False, False, False, False, False, False, False],
'Q': [True, True, True, True, True, True, True, True, True, False, False, False, False, False],
'R': [True, True, True, True, True, True, True, True, True, True, True, False, False, False],
'S': [True, True, True, True, True, True, False, False, False, False, False, False, False, False],
'T': [True, True, True, True, True, True, True, False, False, False, False, False, False, False],
'V': [True, True, True, True, True, True, True, False, False, False, False, False, False, False],
'W': [True, True, True, True, True, True, True, True, True, True, True, True, True, True],
'Y': [True, True, True, True, True, True, True, True, True, True, True, True, False, False],
}
# (14, 3) index tensor with atom indices of atoms a, b and c for NERF reconstruction
# in principle, columns 1 and 2 can be inferred from column one (immediate predecessor) alone
aa_nerf_indices = {
'A': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'C': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'D': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'E': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [6, 5, 4], [6, 5, 4], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'F': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [6, 5, 4], [7, 5, 4], [8, 6, 5], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'G': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'H': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [6, 5, 4], [7, 5, 4], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'I': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [4, 1, 0], [5, 4, 1], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'K': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [6, 5, 4], [7, 6, 5], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'L': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'M': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [6, 5, 4], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'N': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'P': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'Q': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [6, 5, 4], [6, 5, 4], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'R': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [6, 5, 4], [7, 6, 5], [8, 7, 6], [8, 7, 6], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'S': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'T': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [4, 1, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'V': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [4, 1, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
'W': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [6, 5, 4], [7, 5, 4], [7, 5, 4], [9, 7, 5], [10, 7, 5], [11, 9, 7]],
'Y': [[0, 0, 0], [0, 0, 0], [1, 0, 0], [2, 1, 0], [1, 0, 0], [4, 1, 0], [5, 4, 1], [5, 4, 1], [6, 5, 4], [7, 5, 4], [8, 6, 5], [10, 8, 6], [0, 0, 0], [0, 0, 0]],
}
# unique id for each rotatable bond (0=chi1, 1=chi, ...)
aa_bond_to_chi = {
'A': {},
'C': {('CA', 'CB'): 0},
'D': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
'E': {('CA', 'CB'): 0, ('CB', 'CG'): 1, ('CG', 'CD'): 2},
'F': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
'G': {},
'H': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
'I': {('CA', 'CB'): 0, ('CB', 'CG2'): 1},
'K': {('CA', 'CB'): 0, ('CB', 'CG'): 1, ('CG', 'CD'): 2, ('CD', 'CE'): 3},
'L': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
'M': {('CA', 'CB'): 0, ('CB', 'CG'): 1, ('CG', 'SD'): 2},
'N': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
'P': {},
'Q': {('CA', 'CB'): 0, ('CB', 'CG'): 1, ('CG', 'CD'): 2},
'R': {('CA', 'CB'): 0, ('CB', 'CG'): 1, ('CG', 'CD'): 2, ('CD', 'NE'): 3, ('NE', 'CZ'): 4},
'S': {('CA', 'CB'): 0},
'T': {('CA', 'CB'): 0},
'V': {('CA', 'CB'): 0},
'W': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
'Y': {('CA', 'CB'): 0, ('CB', 'CG'): 1},
}
# index between 0 and 4 to retrieve chi angles, -1 means not a rotatable bond
aa_chi_indices = {
'A': [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
'C': [-1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1],
'D': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
'E': [-1, -1, -1, -1, -1, 0, 1, 2, 2, -1, -1, -1, -1, -1],
'F': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
'G': [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
'H': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
'I': [-1, -1, -1, -1, -1, 0, 0, 1, -1, -1, -1, -1, -1, -1],
'K': [-1, -1, -1, -1, -1, 0, 1, 2, 3, -1, -1, -1, -1, -1],
'L': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
'M': [-1, -1, -1, -1, -1, 0, 1, 2, -1, -1, -1, -1, -1, -1],
'N': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
'P': [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
'Q': [-1, -1, -1, -1, -1, 0, 1, 2, 2, -1, -1, -1, -1, -1],
'R': [-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 4, -1, -1, -1],
'S': [-1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1],
'T': [-1, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, -1, -1, -1],
'V': [-1, -1, -1, -1, -1, 0, 0, -1, -1, -1, -1, -1, -1, -1],
'W': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
'Y': [-1, -1, -1, -1, -1, 0, 1, 1, -1, -1, -1, -1, -1, -1],
}
# key: chi index (0=chi1, 1=chi, ...); value: index of atom that defines the chi angle (together with its three predecessors)
aa_chi_anchor_atom = {
'A': {},
'C': {0: 5},
'D': {0: 5, 1: 6},
'E': {0: 5, 1: 6, 2: 7},
'F': {0: 5, 1: 6},
'G': {},
'H': {0: 5, 1: 6},
'I': {0: 5, 1: 7},
'K': {0: 5, 1: 6, 2: 7, 3: 8},
'L': {0: 5, 1: 6},
'M': {0: 5, 1: 6, 2: 7},
'N': {0: 5, 1: 6},
'P': {},
'Q': {0: 5, 1: 6, 2: 7},
'R': {0: 5, 1: 6, 2: 7, 3: 8, 4: 9},
'S': {0: 5},
'T': {0: 5},
'V': {0: 5},
'W': {0: 5, 1: 6},
'Y': {0: 5, 1: 6},
}
# ------------------------------------------------------------------------------
# Visualization
# ------------------------------------------------------------------------------
# PyMOL colors, see: https://pymolwiki.org/index.php/Color_Values#Chemical_element_colours
colors_dic = ['#33ff33', '#3333ff', '#ff4d4d', '#e6c540', '#ffb5b5', '#A62929', '#1FF01F', '#ff8000', '#940094', '#B3FFFF', '#b3e3f5']
radius_dic = [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
# ------------------------------------------------------------------------------
# Backbone geometry
# Taken from: Bhagavan, N. V., and C. E. Ha.
# "Chapter 4-Three-dimensional structure of proteins and disorders of protein misfolding."
# Essentials of Medical Biochemistry (2015): 31-51.
# https://www.sciencedirect.com/science/article/pii/B978012416687500004X
# ------------------------------------------------------------------------------
N_CA_DIST = 1.47
CA_C_DIST = 1.53
N_CA_C_ANGLE = 110 * np.pi / 180
# ------------------------------------------------------------------------------
# Atom radii
# ------------------------------------------------------------------------------
# # https://en.wikipedia.org/wiki/Covalent_radius#Radii_for_multiple_bonds
# # (2023/04/14)
# covalent_radii = {'H': [32, None, None],
# 'C': [75, 67, 60],
# 'N': [71, 60, 54],
# 'O': [63, 57, 53],
# 'F': [64, 59, 53],
# 'B': [85, 78, 73],
# 'Al': [126, 113, 111],
# 'Si': [116, 107, 102],
# 'P': [111, 102, 94],
# 'S': [103, 94, 95],
# 'Cl': [99, 95, 93],
# 'As': [121, 114, 106],
# 'Br': [114, 109, 110],
# 'I': [133, 129, 125],
# 'Hg': [133, 142, None],
# 'Bi': [151, 141, 135]}
# source: https://en.wikipedia.org/wiki/Van_der_Waals_radius
vdw_radii = {'N': 1.55, 'O': 1.52, 'C': 1.70, 'H': 1.10, 'S': 1.80, 'P': 1.80,
'Se': 1.90, 'K': 2.75, 'Na': 2.27, 'Mg': 1.73, 'Zn': 1.39, 'B': 1.92,
'Br': 1.85, 'Cl': 1.75, 'I': 1.98, 'F': 1.47}
WEBDATASET_SHARD_SIZE = 50000
WEBDATASET_VAL_SIZE = 100 |