|
|
""" sanifix4.py |
|
|
|
|
|
Contribution from James Davidson |
|
|
adapted from: https://github.com/abradle/rdkitserver/blob/master/MYSITE/src/testproject/mol_parsing/sanifix.py |
|
|
""" |
|
|
from rdkit import Chem |
|
|
from rdkit.Chem import AllChem |
|
|
import warnings |
|
|
|
|
|
def _FragIndicesToMol(oMol,indices): |
|
|
em = Chem.EditableMol(Chem.Mol()) |
|
|
|
|
|
newIndices={} |
|
|
for i,idx in enumerate(indices): |
|
|
em.AddAtom(oMol.GetAtomWithIdx(idx)) |
|
|
newIndices[idx]=i |
|
|
|
|
|
for i,idx in enumerate(indices): |
|
|
at = oMol.GetAtomWithIdx(idx) |
|
|
for bond in at.GetBonds(): |
|
|
if bond.GetBeginAtomIdx()==idx: |
|
|
oidx = bond.GetEndAtomIdx() |
|
|
else: |
|
|
oidx = bond.GetBeginAtomIdx() |
|
|
|
|
|
if oidx<idx: |
|
|
continue |
|
|
em.AddBond(newIndices[idx],newIndices[oidx],bond.GetBondType()) |
|
|
res = em.GetMol() |
|
|
res.ClearComputedProps() |
|
|
Chem.GetSymmSSSR(res) |
|
|
res.UpdatePropertyCache(False) |
|
|
res._idxMap=newIndices |
|
|
return res |
|
|
|
|
|
def _recursivelyModifyNs(mol,matches,indices=None): |
|
|
if indices is None: |
|
|
indices=[] |
|
|
res=None |
|
|
while len(matches) and res is None: |
|
|
tIndices=indices[:] |
|
|
nextIdx = matches.pop(0) |
|
|
tIndices.append(nextIdx) |
|
|
nm = Chem.Mol(mol) |
|
|
nm.GetAtomWithIdx(nextIdx).SetNoImplicit(True) |
|
|
nm.GetAtomWithIdx(nextIdx).SetNumExplicitHs(1) |
|
|
cp = Chem.Mol(nm) |
|
|
try: |
|
|
Chem.SanitizeMol(cp) |
|
|
except ValueError: |
|
|
res,indices = _recursivelyModifyNs(nm,matches,indices=tIndices) |
|
|
else: |
|
|
indices=tIndices |
|
|
res=cp |
|
|
return res,indices |
|
|
|
|
|
def AdjustAromaticNs(m,nitrogenPattern='[n&D2&H0;r5,r6]'): |
|
|
""" |
|
|
default nitrogen pattern matches Ns in 5 rings and 6 rings in order to be able |
|
|
to fix: O=c1ccncc1 |
|
|
""" |
|
|
Chem.GetSymmSSSR(m) |
|
|
m.UpdatePropertyCache(False) |
|
|
|
|
|
|
|
|
em = Chem.EditableMol(m) |
|
|
linkers = m.GetSubstructMatches(Chem.MolFromSmarts('[r]!@[r]')) |
|
|
plsFix=set() |
|
|
for a,b in linkers: |
|
|
em.RemoveBond(a,b) |
|
|
plsFix.add(a) |
|
|
plsFix.add(b) |
|
|
nm = em.GetMol() |
|
|
for at in plsFix: |
|
|
at=nm.GetAtomWithIdx(at) |
|
|
if at.GetIsAromatic() and at.GetAtomicNum()==7: |
|
|
at.SetNumExplicitHs(1) |
|
|
at.SetNoImplicit(True) |
|
|
|
|
|
|
|
|
fragLists = Chem.GetMolFrags(nm) |
|
|
frags = [_FragIndicesToMol(nm,x) for x in fragLists] |
|
|
|
|
|
|
|
|
ok=True |
|
|
for i,frag in enumerate(frags): |
|
|
cp = Chem.Mol(frag) |
|
|
try: |
|
|
Chem.SanitizeMol(cp) |
|
|
except ValueError: |
|
|
matches = [x[0] for x in frag.GetSubstructMatches(Chem.MolFromSmarts(nitrogenPattern))] |
|
|
lres,indices=_recursivelyModifyNs(frag,matches) |
|
|
if not lres: |
|
|
|
|
|
ok=False |
|
|
break |
|
|
else: |
|
|
revMap={} |
|
|
for k,v in frag._idxMap.items(): |
|
|
revMap[v]=k |
|
|
for idx in indices: |
|
|
oatom = m.GetAtomWithIdx(revMap[idx]) |
|
|
oatom.SetNoImplicit(True) |
|
|
oatom.SetNumExplicitHs(1) |
|
|
if not ok: |
|
|
return None |
|
|
return m |
|
|
|
|
|
|
|
|
|
|
|
def fix_mol(m): |
|
|
if m is None: |
|
|
return None |
|
|
try: |
|
|
m.UpdatePropertyCache(False) |
|
|
cp = Chem.Mol(m.ToBinary()) |
|
|
Chem.SanitizeMol(cp) |
|
|
m = cp |
|
|
|
|
|
warnings.warn(f'fine: {Chem.MolToSmiles(m)}') |
|
|
return m |
|
|
except ValueError: |
|
|
|
|
|
warnings.warn('adjust') |
|
|
nm=AdjustAromaticNs(m) |
|
|
if nm is not None: |
|
|
try: |
|
|
Chem.SanitizeMol(nm) |
|
|
|
|
|
warnings.warn(f'fixed: {Chem.MolToSmiles(nm)}') |
|
|
except ValueError: |
|
|
|
|
|
warnings.warn('still broken') |
|
|
else: |
|
|
|
|
|
warnings.warn('still broken') |
|
|
return nm |
|
|
|
|
|
if __name__=='__main__': |
|
|
ms = [x for x in open("18.sdf").read().split("$$$$\n")] |
|
|
for txt_m in ms: |
|
|
if not txt_m: |
|
|
continue |
|
|
m = Chem.MolFromMolBlock(txt_m, False) |
|
|
print('#---------------------') |
|
|
try: |
|
|
m.UpdatePropertyCache(False) |
|
|
cp = Chem.Mol(m.ToBinary()) |
|
|
Chem.SanitizeMol(cp) |
|
|
m = cp |
|
|
print('fine:',Chem.MolToSmiles(m)) |
|
|
except ValueError: |
|
|
print('adjust') |
|
|
nm=AdjustAromaticNs(m) |
|
|
if nm is not None: |
|
|
Chem.SanitizeMol(nm) |
|
|
print('fixed:',Chem.MolToSmiles(nm)) |
|
|
else: |
|
|
print('still broken') |
|
|
|