Source code for chemper.mol_toolkits.cp_rdk

"""
cp_rdk.py

Cheminformatics tools using RDKit

The classes provided here follow the structure in adapters.
This is a wrapper allowing our actual package to use RDKit
"""

from chemper.mol_toolkits.adapters import MolAdapter, AtomAdapter, BondAdapter
from rdkit import Chem


# =======================================
# Molecule Class
# =======================================

[docs]class Mol(MolAdapter): def __init__(self, mol): """ Create a ChemPer Mol from an RDMol Parameters ---------- mol : openeye RDKMol object openeye molecule to convert to ChemPer Mol object """ if not isinstance(mol, Chem.rdchem.Mol): raise TypeError("Expecting an rdchem.Mol instead of %s" % type(mol)) self.mol = mol def __str__(self): return self.get_smiles()
[docs] @classmethod def from_smiles(cls, smiles): mol = Chem.MolFromSmiles(smiles) if mol is None: raise ValueError('Could not parse SMILES %s' % smiles) return cls(Chem.AddHs(mol))
[docs] def set_aromaticity_mdl(self): Chem.SanitizeMol(self.mol, Chem.SANITIZE_ALL^Chem.SANITIZE_SETAROMATICITY) Chem.SetAromaticity(self.mol, Chem.AromaticityModel.AROMATICITY_MDL)
[docs] def get_atoms(self): return [Atom(a) for a in self.mol.GetAtoms()]
[docs] def get_atom_by_index(self, idx): return Atom(self.mol.GetAtomWithIdx(idx))
[docs] def get_bonds(self): return [Bond(b) for b in self.mol.GetBonds()]
[docs] def get_bond_by_index(self, idx): return Bond(self.mol.GetBondWithIdx(idx))
[docs] def get_bond_by_atoms(self, atom1, atom2): if not atom1.is_connected_to(atom2): return None return Bond(self.mol.GetBondBetweenAtoms(atom1.get_index(), atom2.get_index()))
[docs] def get_smiles(self): smiles = Chem.MolToSmiles(Chem.RemoveHs(self.mol)) return smiles
# ======================================= # Atom Class # =======================================
[docs]class Atom(AtomAdapter): def __init__(self, atom): """ Create a ChemPer Atom from an RDAtom Parameters ---------- atom : RDKit Atom Atom object from an RDK molecule """ if not isinstance(atom, Chem.rdchem.Atom): raise TypeError("Expecting a rdchem.Atom instead of %s" % type(atom)) self.atom = atom self._idx = self.atom.GetIdx() def __str__(self): return '%i%s' % (self._idx, self.atom.GetSymbol())
[docs] def atomic_number(self): return self.atom.GetAtomicNum()
[docs] def degree(self): return self.atom.GetDegree()
[docs] def connectivity(self): return self.atom.GetTotalDegree()
[docs] def valence(self): return self.atom.GetTotalValence()
[docs] def formal_charge(self): return self.atom.GetFormalCharge()
[docs] def hydrogen_count(self): return self.atom.GetTotalNumHs(includeNeighbors=True)
[docs] def ring_connectivity(self): return len([b for b in self.atom.GetBonds() if b.IsInRing()])
[docs] def min_ring_size(self): if not self.atom.IsInRing(): return 0 for i in range(10000): if self.atom.IsInRingSize(i): return i # TODO: raise exception instead? return 10000
[docs] def is_aromatic(self): return self.atom.GetIsAromatic()
[docs] def get_index(self): return self._idx
[docs] def is_connected_to(self, atom2): if not isinstance(atom2.atom, Chem.rdchem.Atom): return False neighbors = [a.GetIdx() for a in self.atom.GetNeighbors()] return atom2.get_index() in neighbors
[docs] def get_neighbors(self): return [Atom(a) for a in self.atom.GetNeighbors()]
[docs] def get_bonds(self): return [Bond(b) for b in self.atom.GetBonds()]
[docs] def get_molecule(self): mol = Chem.Mol(self.atom.GetOwningMol()) return Mol(mol)
# ======================================= # Bond Class # =======================================
[docs]class Bond(BondAdapter): def __init__(self, bond): """ Creates a ChemPer Bond from an RDK Bond Parameters ---------- bond : RDK Bond Bond object from an RDK molecule """ if not isinstance(bond, Chem.rdchem.Bond): raise TypeError("Expecting an rdchem.Bond instead of %s" % type(bond)) self.bond = bond # save index self._idx = self.bond.GetIdx() # save order information self._order = self.bond.GetBondTypeAsDouble() orders = {1:'-', 2:'=', 3:'#', 1.5:':'} self._order_symbol = orders.get(self._order, '~') # save atoms in bond self._beginning = Atom(self.bond.GetBeginAtom()) self._end = Atom(self.bond.GetEndAtom()) def __str__(self): return "%i %s%s%s" % (self.get_index(), self._beginning, self._order_symbol, self._end)
[docs] def get_order(self): return self._order
[docs] def get_atoms(self): return [self._beginning, self._end]
[docs] def is_ring(self): return self.bond.IsInRing()
[docs] def is_aromatic(self): return self.bond.GetIsAromatic()
[docs] def is_single(self): return self._order == 1
[docs] def is_double(self): return self._order == 2
[docs] def is_triple(self): return self._order == 3
[docs] def get_molecule(self): mol = Chem.Mol(self.bond.GetOwningMol()) return Mol(mol)
[docs] def get_index(self): return self._idx
# ===================================================================== # functions for importing molecules from files # =====================================================================
[docs]def mols_from_mol2(mol2_file): """ Parses a mol2 file into ChemPer molecules using RDKit This is a hack for separating mol2 files taken from a Source Forge discussion here: https://www.mail-archive.com/rdkit-discuss@lists.sourceforge.net/msg01510.html It splits up a mol2 file into blocks and then uses RDKit to parse those blocks Parameters ---------- mol2_file : str relative or absolute path to a mol2 file you want to parse accessible form the current directory Returns ------- mols : list[ChemPer Mol] list of molecules in the mol2 file as ChemPer molecules """ # TODO: check that this works with mol2 files with a single molecule # TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type import os if not os.path.exists(mol2_file): from chemper.chemper_utils import get_data_path mol_path = get_data_path(os.path.join('molecules', mol2_file)) if not os.path.exists(mol_path): raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol2_file) else: mol2_file = mol_path delimiter="@<TRIPOS>MOLECULE" if mol2_file.split('.')[-1] != "mol2": raise IOError("File '%s' is not a mol2 file" % mol2_file) if not os.path.exists(mol2_file): raise IOError("File '%s' not found." % mol2_file) molecules = list() mol2_block = list() file_open = open(mol2_file) for line in file_open: if line.startswith(delimiter) and mol2_block: rdmol = Chem.MolFromMol2Block("".join(mol2_block)) if rdmol is not None: molecules.append(Mol(rdmol)) mol2_block = [] mol2_block.append(line) if mol2_block: rdmol = Chem.MolFromMol2Block("".join(mol2_block)) if rdmol is not None: molecules.append(Mol(rdmol)) file_open.close() return molecules