"""
cp_rdk.py
Cheminformatics tools using RDKit
The classes provided here follow the structure in adapters.
This is a wrapper allowing our actual package to use RDKit
"""
from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
from rdkit import Chem
# =======================================
# Molecule Class
# =======================================
[docs]class Mol(MolAdapter):
def __init__(self, mol):
"""
Create a ChemPer Mol from an RDMol
Parameters
----------
mol : openeye RDKMol object
openeye molecule to convert to ChemPer Mol object
"""
if not isinstance(mol, Chem.rdchem.Mol):
raise TypeError("Expecting an rdchem.Mol instead of %s" % type(mol))
self.mol = mol
def __str__(self): return self.get_smiles()
[docs] @classmethod
def from_smiles(cls, smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError('Could not parse SMILES %s' % smiles)
return cls(Chem.AddHs(mol))
[docs] def set_aromaticity_mdl(self):
Chem.SanitizeMol(self.mol, Chem.SANITIZE_ALL^Chem.SANITIZE_SETAROMATICITY)
Chem.SetAromaticity(self.mol, Chem.AromaticityModel.AROMATICITY_MDL)
[docs] def get_atoms(self):
return [Atom(a) for a in self.mol.GetAtoms()]
[docs] def get_atom_by_index(self, idx):
return Atom(self.mol.GetAtomWithIdx(idx))
[docs] def get_bonds(self):
return [Bond(b) for b in self.mol.GetBonds()]
[docs] def get_bond_by_index(self, idx):
return Bond(self.mol.GetBondWithIdx(idx))
[docs] def get_bond_by_atoms(self, atom1, atom2):
if not atom1.is_connected_to(atom2):
return None
return Bond(self.mol.GetBondBetweenAtoms(atom1.get_index(), atom2.get_index()))
[docs] def smirks_search(self, smirks):
cmol = Chem.Mol(self.mol)
matches = list()
ss = Chem.MolFromSmarts(smirks)
if ss is None:
raise ValueError("Error parsing SMIRKS %s" % smirks)
# get atoms in query mol with smirks index
maps = dict()
for qatom in ss.GetAtoms():
smirks_idx = qatom.GetAtomMapNum()
if smirks_idx != 0:
maps[smirks_idx] = qatom.GetIdx()
for match in cmol.GetSubstructMatches(ss, False):
d = {k:self.get_atom_by_index(match[e]) for k,e in maps.items()}
matches.append(d)
return matches
[docs] def get_smiles(self):
smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol))
return smiles
# =======================================
# Atom Class
# =======================================
[docs]class Atom(AtomAdapter):
def __init__(self, atom):
"""
Create a ChemPer Atom from an RDAtom
Parameters
----------
atom : RDKit Atom
Atom object from an RDK molecule
"""
if not isinstance(atom, Chem.rdchem.Atom):
raise TypeError("Expecting a rdchem.Atom instead of %s" % type(atom))
self.atom = atom
self._idx = self.atom.GetIdx()
def __str__(self): return '%i%s' % (self._idx, self.atom.GetSymbol())
[docs] def atomic_number(self): return self.atom.GetAtomicNum()
[docs] def degree(self): return self.atom.GetDegree()
[docs] def connectivity(self): return self.atom.GetTotalDegree()
[docs] def valence(self): return self.atom.GetTotalValence()
[docs] def hydrogen_count(self):
return self.atom.GetTotalNumHs(includeNeighbors=True)
[docs] def ring_connectivity(self):
return len([b for b in self.atom.GetBonds() if b.IsInRing()])
[docs] def min_ring_size(self):
if not self.atom.IsInRing():
return 0
for i in range(10000):
if self.atom.IsInRingSize(i):
return i
# TODO: raise exception instead?
return 10000
[docs] def is_aromatic(self): return self.atom.GetIsAromatic()
[docs] def get_index(self): return self._idx
[docs] def is_connected_to(self, atom2):
if not isinstance(atom2.atom, Chem.rdchem.Atom):
return False
neighbors = [a.GetIdx() for a in self.atom.GetNeighbors()]
return atom2.get_index() in neighbors
[docs] def get_neighbors(self):
return [Atom(a) for a in self.atom.GetNeighbors()]
[docs] def get_bonds(self):
return [Bond(b) for b in self.atom.GetBonds()]
[docs] def get_molecule(self):
mol = Chem.Mol(self.atom.GetOwningMol())
return Mol(mol)
# =======================================
# Bond Class
# =======================================
[docs]class Bond(BondAdapter):
def __init__(self, bond):
"""
Creates a ChemPer Bond from an RDK Bond
Parameters
----------
bond : RDK Bond
Bond object from an RDK molecule
"""
if not isinstance(bond, Chem.rdchem.Bond):
raise TypeError("Expecting an rdchem.Bond instead of %s" % type(bond))
self.bond = bond
# save index
self._idx = self.bond.GetIdx()
# save order information
self._order = self.bond.GetBondTypeAsDouble()
orders = {1:'-', 2:'=', 3:'#', 1.5:':'}
self._order_symbol = orders.get(self._order, '~')
# save atoms in bond
self._beginning = Atom(self.bond.GetBeginAtom())
self._end = Atom(self.bond.GetEndAtom())
def __str__(self):
return "%i %s%s%s" % (self.get_index(), self._beginning,
self._order_symbol, self._end)
[docs] def get_order(self): return self._order
[docs] def get_atoms(self): return [self._beginning, self._end]
[docs] def is_ring(self): return self.bond.IsInRing()
[docs] def is_aromatic(self): return self.bond.GetIsAromatic()
[docs] def is_single(self): return self._order == 1
[docs] def is_double(self): return self._order == 2
[docs] def is_triple(self): return self._order == 3
[docs] def get_molecule(self):
mol = Chem.Mol(self.bond.GetOwningMol())
return Mol(mol)
[docs] def get_index(self): return self._idx
# =====================================================================
# functions for importing molecules from files
# =====================================================================
[docs]def mols_from_mol2(mol2_file):
"""
Parses a mol2 file into ChemPer molecules using RDKit
This is a hack for separating mol2 files taken from a Source Forge discussion here:
https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01510.html
It splits up a mol2 file into blocks and then uses RDKit to parse those blocks
Parameters
----------
mol2_file : str
relative or absolute path to a mol2 file you want to parse
accessible form the current directory
Returns
-------
mols : list[ChemPer Mol]
list of molecules in the mol2 file as ChemPer molecules
"""
# TODO: check that this works with mol2 files with a single molecule
# TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type
import os
if not os.path.exists(mol2_file):
from chemper.chemper_utils import get_data_path
mol_path = get_data_path(os.path.join('molecules', mol2_file))
if not os.path.exists(mol_path):
raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol2_file)
else:
mol2_file = mol_path
delimiter="@<TRIPOS>MOLECULE"
if mol2_file.split('.')[-1] != "mol2":
raise IOError("File '%s' is not a mol2 file" % mol2_file)
if not os.path.exists(mol2_file):
raise IOError("File '%s' not found." % mol2_file)
molecules = list()
mol2_block = list()
file_open = open(mol2_file)
for line in file_open:
if line.startswith(delimiter) and mol2_block:
rdmol = Chem.MolFromMol2Block("".join(mol2_block))
if rdmol is not None:
molecules.append(Mol(rdmol))
mol2_block = []
mol2_block.append(line)
if mol2_block:
rdmol = Chem.MolFromMol2Block("".join(mol2_block))
if rdmol is not None:
molecules.append(Mol(rdmol))
file_open.close()
return molecules