Source code for chemper.graphs.cluster_graph

"""
cluster_graph.py

ClusterGraph are a class for tracking all possible smirks decorators in a group
(or cluster) of molecular fragments. Moving forward these will be used to
find the minimum number of smirks decorators that are required to have a
set of smirks patterns that maintain a given clustering of fragments.
"""

import networkx as nx
from functools import total_ordering
from chemper.graphs.single_graph import SingleGraph
from chemper.graphs.environment import ChemicalEnvironment as CE
from chemper.mol_toolkits import mol_toolkit


[docs]@total_ordering class ClusterGraph(SingleGraph): """ ChemPerGraphs are a graph based class for storing atom and bond information. They use the chemper.mol_toolkits Atoms, Bonds, and Mols """
[docs] @total_ordering class AtomStorage: """ AtomStorage tracks information about an atom """ def __init__(self, atoms=None, label=None): """ Parameters ---------- atoms : ChemPer Atom or list of ChemPer Atoms this is one or more atoms whose information should be stored label : int SMIRKS index (:n) for writing SMIRKS if the value is less than zero it is used for storage purposes only as SMIRKS can only be written with positive integer indices """ self.decorators = set() if atoms is not None: # check if this is a single atom if 'Atom' in str(type(atoms)): atoms = [atoms] # otherwise it should be iterable for atom in atoms: self.decorators.add(self.make_atom_decorators(atom)) self.label = label def __lt__(self, other): """ Overrides the default implementation This method was primarily written for making SMIRKS patterns predictable. If atoms are sortable, then the SMIRKS patterns are always the same making tests easier to write. However, the specific sorting was created to also make SMIRKS output as human readable as possible, that is to at least make it easier for a human to see how the indexed atoms are related to each other. It is typically easier for humans to read SMILES/SMARTS/SMIRKS with less branching (indicated with ()). For example in: [C:1]([H])([H])~[N:2]([C])~[O:3] it is easier to see that the atoms C~N~O are connected in a "line" instead of: [C:1]([N:2]([O:3])[C])([H])[H] which is equivalent, but with all the () it is hard for a human to read the branching Parameters ---------- other : AtomStorage Returns ------- is_less_than : boolean self is less than other """ # if either smirks index is None, then you can't directly compare # make a temporary index that is negative if it was None self_index = self.label if self.label is not None else -1000 other_index = other.label if other.label is not None else -1000 # if either index is greater than 0, the one that is largest should go at the end of the list if self_index > 0 or other_index > 0: return self_index < other_index # Both SMIRKS indices are not positive or None so compare the SMIRKS patterns instead return self.as_smirks() < other.as_smirks() def __eq__(self, other): return self.as_smirks() == other.as_smirks() and self.label == other.label def __hash__(self): return id(self) def __str__(self): return self.as_smirks()
[docs] def make_atom_decorators(self, atom): """ extract information from a ChemPer Atom that would be useful in a smirks parameters ---------- atom : ChemPer atom object returns ------- decorators : tuple of str tuple of all possible decorators for this atom """ aromatic = 'a' if atom.is_aromatic() else 'A' charge = atom.formal_charge() if charge >= 0: charge = '+%i' % charge else: charge = '%i' % charge min_ring_size = atom.min_ring_size() if min_ring_size == 0: ring = '!r' else: ring = 'r%i' % min_ring_size return ( '#%i' % atom.atomic_number(), 'H%i' % atom.hydrogen_count(), 'X%i' % atom.connectivity(), 'x%i' % atom.ring_connectivity(), ring, charge, aromatic, )
[docs] def as_smirks(self, compress=False): """ Parameters ---------- compress : boolean should decorators common to all sets be combined for example '#6X4,#7X3;+0!r...' Returns ------- smirks : str how this atom would be represented in a SMIRKS string with the minimal combination of SMIRKS decorators """ if len(self.decorators) == 0: if self.label is None or self.label <= 0: return '[*]' return '[*:%i]' % self.label if compress and len(self.decorators) > 1: base_smirks = self._compress_smirks() else: base_smirks = ','.join(sorted([''.join(l) for l in self.decorators])) if self.label is None or self.label <= 0: return '[%s]' % base_smirks return '[%s:%i]' % (base_smirks, self.label)
def _sort_decs(self, dec_set, wild=True): """ Parameters ---------- dec_set : list like single set of atom decorators wild : boolean insert * for decorator lists with no #n decorator Returns ------- sorted_dec_set : list same set of decorators sorted with atomic number or * first """ temp_dec_set = list(dec_set) atom_num = [i for i in temp_dec_set if '#' in i] if len(atom_num) == 0 and wild: atom_num = ["*"] temp_dec_set = set(temp_dec_set) - set(atom_num) aro = [i for i in temp_dec_set if 'a' in i.lower()] temp_dec_set = set(temp_dec_set) - set(aro) return atom_num + sorted(list(temp_dec_set)) + aro def _compress_smirks(self): """ Returns ------- smirks : str This SMIRKS is compressed with all common decorators and'd to the end of the pattern """ set_decs = [set(d) for d in self.decorators] ands = set_decs[0] for d_set in set_decs: ands = ands & d_set # check for atomic number in the "ands" atomic = [a for a in ands if '#' in a] if len(atomic) == 1: # remove from and ands.remove(atomic[0]) # put in all sets for s in set_decs: s.add(atomic[0]) or_sets = [self._sort_decs(d.difference(ands)) for d in set_decs] ors = [''.join(o) for o in or_sets] # add commas between ors base = ','.join(sorted(ors)) # add and decorators if len(ands) > 0: base += ';'+ ';'.join(self._sort_decs(ands, wild=False)) return base
[docs] def add_atom(self, atom): """ Expand current AtomStorage by adding information about a new ChemPer Atom Parameters ---------- atom : ChemPer Atom """ self.decorators.add(self.make_atom_decorators(atom))
[docs] def compare_atom(self, atom): """ Compares decorators in this AtomStorage with the provided ChemPer atom. The decorators are compared separately and the highest score is returned. For example, if this storage had two sets of decorators - #7H1X3x0!r+0A - #6H1X4x0!r+0A and the input atom would have the decorators: - #6H1X3x2!r+0a The score is calculated by finding the number of decorators in common which would be - #7H1X3x0!r+0A and #6H1X3x2r6+0a have 3 decorators in common (H1,X3,+0) - #6H1X4x0!r+0A and #6H1X3x2r6+0a also have 3 decorators in common (#6, H1, +0) However, we weight atoms with the same atomic number as more similar by dividing the score by 10 if the atomic numbers do not agree. Therefore the final scores will be: - 0.3 for #7H1X3x0!r+0A - 3 for #6H1X4x0!r+0A The highest score for any set of decorators is returned so 3 is the returned score in this example. Parameters ---------- atom : ChemPer Atom Returns ------- score : float A score describing how similar the input atom is to any set of decorators currently in this storage, based on its SMIRKS decorators. This score ranges from 0 to 7. 7 comes from the number of decorators on any atom, if this atom matches perfectly with one of the current decorator sets then 7 decorators agree.However, if the atomic number doesn't agree, then that set of decorators is considered less ideal, thus if the atomic numbers don't agree, then the score is given by the number other decorators divided by 10. If the current storage is empty, then the score is given as 7 since any atom matches a wildcard atom. """ # If decorators is empty (no known atom information, return 7 (current max) if len(self.decorators) == 0: return 7 score = 0 decs = self.make_atom_decorators(atom) for ref in self.decorators: # get atomic number for this set of decorators current = len(set(ref) & set(decs)) # if atomic numbers don't agree, get the number of common decorators / 10 # if there are no matching atomic numbers, priority should still be given # when the current atom matches stored decorators most closely if ref[0] != decs[0]: current = current / 10.0 if current > score: score = current return score
[docs] @total_ordering class BondStorage: """ BondStorage tracks information about a bond """ def __init__(self, bonds=None, label=None): """ Parameters ---------- bonds : list of ChemPer Bonds this is one or more bonds whose information should be stored label : a label for the object, it can be anything unlike atoms, bonds in smirks don't have labels so this is only used for labeling the object if wanted """ self.order = set() self.ring = set() self.order_dict = {1:'-', 1.5:':', 2:'=', 3:'#'} if bonds is not None: if 'Bond' in str(type(bonds)): bonds = [bonds] for bond in bonds: self.order.add(bond.get_order()) self.ring.add(bond.is_ring()) self.label = label def __str__(self): return self.as_smirks() def __lt__(self, other): if self.as_smirks() == other.as_smirks(): return self.label < other.label return self.as_smirks() < other.as_smirks() def __eq__(self, other): return self.label == other.label and self.as_smirks() == other.as__smirks() def __hash__(self): return id(self)
[docs] def as_smirks(self): """ Returns ------- smirks : str how this bond would be represented in a SMIRKS string using only the required number of """ if len(self.order) == 0: order = '~' else: order = ','.join([self.order_dict.get(o, '~') for o in sorted(list(self.order))]) # the ring set has booleans, if the length of the set is 1 then only ring (@) or non-ring (!@) # bonds haven been added to this storage and we AND that decorator to the end of the bond if len(self.ring) == 1: if list(self.ring)[0]: return order+';@' else: return order+';!@' return order
[docs] def add_bond(self, bond): """ Expand current BondStorage by adding information about a new ChemPer Bond Parameters ---------- bond : ChemPer Bond """ self.order.add(bond.get_order()) self.ring.add(bond.is_ring())
[docs] def compare_bond(self, bond): """ Parameters ---------- bond : ChemPer Bond bond you want to compare to the current storage Returns ------- score : int (0,1,2) A score describing how similar the input bond is to any set of decorators currently in this storage, based on its SMIRKS decorators. 1 for the bond order + 1 base on if this is a ring bond """ score = 0 if bond.get_order() in self.order or len(self.order) == 0: score += 1 # the ring set has booleans, if the length of the set is 1 then only ring or non-ring # bonds haven been added to this storage. That is the only time the ring contributes to the score if len(self.ring) == 1 and list(self.ring)[0] == bond.is_ring(): score += 1 return score
# Initiate ClusterGraph def __init__(self, mols=None, smirks_atoms_lists=None, layers=0): """ Initialize a SingleGraph from a molecule and list of indexed atoms For the example, imagine we wanted to get a SMIRKS that would match the carbon-carbon bonds in ethane and propane. The carbon atoms are have indices (0,1) in ethane and (0,1) and (1,2) in propane. For this example, we will assume we also want to include the atoms one bond away from the indexed atoms (1 layer away). Parameters ---------- mols : list of molecules (optional) default = None (makes an empty graph) these can be ChemPer Mols or molecule objects from any supported toolkit (currently OpenEye or RDKit) smirks_atoms_lists : list of list of tuples (optional) default = None (must be paired with mols=None) There is a list of tuples for each molecule, where each tuple specifies a molecular fragment using the atoms' indices. In the ethane and propane example, the `smirks_atoms_lists` would be [ [ (0,1) ], [ (0,1), (1,2) ] ] with one carbon-carbon bond in ethane and two carbon-carbon bonds in propane layers : int (optional) default = 0 layers specifies how many bonds away from the indexed atoms should be included in the the SMIRKS patterns. Instead of an int, the string 'all' would lead to all atoms in the molecules being included in the SMIRKS (not recommended) """ SingleGraph.__init__(self) self.mols = list() self.smirks_atoms_lists = list() self.layers = layers self._symmetry_funct = self._no_symmetry if mols is not None: temp_mols = [mol_toolkit.Mol(m) for m in mols] if len(temp_mols) != len(smirks_atoms_lists): raise Exception('Number of molecules and smirks dictionaries should be equal') for idx, mol in enumerate(temp_mols): self.add_mol(mol, smirks_atoms_lists[idx])
[docs] def as_smirks(self, compress=False): """ Parameters ---------- compress : boolean returns the shorter version of atom SMIRKS patterns that is atoms have decorators "anded" to the end rather than listed in each set that are OR'd together. For example "[#6AH2X3x0!r+0,#6AH1X3x0!r+0:1]-;!@[#1AH0X1x0!r+0]" compresses to: "[#6H2,#6H1;AX3x0!r+0:1]-;!@[#1AH0X1x0!r+0]" Returns ------- SMIRKS : str a SMIRKS string matching the exact atom and bond information stored """ # The atom compression is different, but otherwise this is the # same function as the parent class (SingleGraph) return SingleGraph.as_smirks(self, compress)
[docs] def get_symmetry_funct(self, sym_label): """ Determine the symmetry function that should be used when adding atoms to this graph. For example, imagine a user is trying to make a SMIRKS for all of the C-H bonds in methane. In most toolkits the index for the carbon is 0 and the hydrogens are 1,2,3,4. The final SMIRKS should have the form [#6AH4X4x0!r+0:1]-;!@[#1AH0X1x0!r+0] no matter what order the atoms are input into ClusterGraph. So if the user provides (0,1), (0,2), (3,0), (4,0) ClusterGraph should figure out that the carbons in (3,0) and (4,0) should be in the atom index :1 place like they were in the first set of atoms. Bond atoms in (1,2) or (2,1) are symmetric, for angles its (1,2,3) or (3,2,1) for proper torsions (1,2,3,4) or (4,3,2,1) and for improper torsions (1,2,3,4), (3,2,1,4), (4,2,1,3). For any other fragment type the atoms will be added to the graph in the order they are provided since the symmetry function is unknown. # TODO: In theory you could generalize this for generic linear fragments # where those with an odd number of atoms behave like angles and an # even number behave like proper torsions, however I think that is # going to be outside the scope of ChemPer for the foreseeable future. Parameters ---------- sym_label : str or None type of symmetry, options which will change the way symmetry is handled in the graph are "bond", "angle", "ProperTorsion", and "ImproperTorsion" Returns ------- symmetry_funct : function returns the function that should be used to handle the appropriate symmetry """ if sym_label is None: return self._no_symmetry if sym_label.lower() == 'bond': return self._bond_symmetry if sym_label.lower() == 'angle': return self._angle_symmetry if sym_label.lower() == 'propertorsion': return self._proper_torsion_symmetry if sym_label.lower() == 'impropertorsion': return self._improper_torsion_symmetry return self._no_symmetry
[docs] def add_mol(self, input_mol, smirks_atoms_list): """ Expand the information in this graph by adding a new molecule Parameters ---------- input_mol : ChemPer Mol smirks_atoms_list : list of tuples This is a list of tuples with atom indices [ (indices), ... ] """ mol = mol_toolkit.Mol(input_mol) if len(smirks_atoms_list) == 0: return if len(self.mols) == 0: self._add_first_smirks_atoms(mol, smirks_atoms_list[0]) self._symmetry_funct = self.get_symmetry_funct(CE(self.as_smirks()).get_type()) self._add_mol(mol, smirks_atoms_list[1:]) else: self._add_mol(mol, smirks_atoms_list) self.mols.append(mol) self.smirks_atoms_lists.append(smirks_atoms_list)
def _add_first_smirks_atoms(self, mol, smirks_atoms): """ private function for adding the first molecule to an empty ClusterGraph add_mol calls this if the graph is empty Parameters ---------- mol : ChemPer Mol smirks_atoms : tuple tuple of atom indices for the first atoms to add to the graph. i.e. (0, 1) """ atom_dict = dict() for key, atom_index in enumerate(smirks_atoms, 1): atom_dict[atom_index] = key atom1 = mol.get_atom_by_index(atom_index) new_atom_storage = self.AtomStorage([atom1], key) self._graph.add_node(new_atom_storage) self.atom_by_label[key] = new_atom_storage # Check for bonded atoms already in the graph for neighbor_key in range(len(smirks_atoms), 0, -1): if neighbor_key not in self.atom_by_label: continue # check if atoms are already connected on the graph neighbor_storage = self.atom_by_label[neighbor_key] if nx.has_path(self._graph, new_atom_storage, neighbor_storage): continue # check if atoms are connected in the molecule atom2 = mol.get_atom_by_index(smirks_atoms[neighbor_key-1]) bond = mol.get_bond_by_atoms(atom1, atom2) if bond is not None: # Atoms are connected add edge bond_smirks = tuple(sorted([neighbor_key, key])) bond_storage = self.BondStorage([bond], bond_smirks) self.bond_by_label[bond_smirks] = bond_storage self._graph.add_edge(new_atom_storage, neighbor_storage, bond=bond_storage) # for each indexed atoms add unindexed atoms for the number of specified layers for atom_label, atom_index in enumerate(smirks_atoms, 1): atom = mol.get_atom_by_index(atom_index) storage = self.atom_by_label[atom_label] self._add_layers(mol, atom, storage, self.layers, atom_dict, is_first=True) def _add_layers(self, mol, atom, storage, layers, idx_dict, is_first=False): """ Parameters ---------- mol : ChemPer Mol molecule containing provided atom atom : ChemPer Atom storage: AtomStorage corresponding to the ChemPer Atom provided layers : int or 'all' number of layers left to add (or all) idx_dict : dict form {atom index: label} for this smirks_list in this molecule """ # if layers is 0 there are no more atoms to add so end the recursion if layers == 0: return # find atom neighbors that are not already included in SMIRKS indexed atoms atom_neighbors = [(a, mol.get_bond_by_atoms(a,atom)) for a in atom.get_neighbors() \ if a.get_index() not in idx_dict] # get the smirks indices already added to the storage # This includes all previous layers since the idx_dict is updated as you go storage_labels = [e for k,e in idx_dict.items()] # similar to atoms find neighbors already in the graph that haven't already been used storage_neighbors = [(s, self.get_connecting_bond(s, storage)) for s in self.get_neighbors(storage) \ if s.label not in storage_labels] new_pairs = list() # if this is the first set of atoms added, just make a new # storage for all neighboring atoms if is_first: min_smirks = storage.label * 10 if min_smirks > 0: min_smirks = min_smirks * -1 for a, b in atom_neighbors: new_bond_smirks = tuple(sorted([storage.label, min_smirks])) adding_new_storage = self.add_atom(a,b,storage, min_smirks, new_bond_smirks) idx_dict[a.get_index()] = min_smirks self.atom_by_label[min_smirks] = adding_new_storage min_smirks -= 1 new_pairs.append((a, adding_new_storage)) else: # this isn't the first set of atoms so you need to # pair up the atoms with their storage pairs = self.find_pairs(atom_neighbors, storage_neighbors) for new_atom, new_bond, new_storage_atom, new_storage_bond in pairs: # if no storage is paired to this atom skip it if new_storage_atom is None: continue # if there is no atom paired to a storage remove that branch if new_atom is None: self.remove_atom(new_storage_atom) continue # add atom and bond information to the storage new_storage_atom.add_atom(new_atom) new_storage_bond.add_bond(new_bond) new_pairs.append((new_atom, new_storage_atom)) idx_dict[new_atom.get_index()] = new_storage_atom.label # Repeat for the extra layers if layers == 'all': new_layers = 'all' else: new_layers = layers - 1 if new_layers == 0: return for new_atom, new_storage in new_pairs: self._add_layers(mol, new_atom, new_storage, new_layers, idx_dict, is_first)
[docs] def find_pairs(self, atoms_and_bonds, storages): """ Find pairs is used to determine which current AtomStorage from storages atoms should be paired with. This function takes advantage of the maximum scoring function in networkx to find the pairing with the highest "score". Scores are determined using functions in the atom and bond storage objects that compare those storages to the new atom or bond. If there are less atoms than storages then the atoms with the lowest pair are assigned a None pairing. Parameters ---------- atoms_and_bonds : list of tuples in form (ChemPer Atom, ChemPer Bond, ...) storages: list of tuples in form (AtomStorage, BondStorage, ...) Tuples can be of any length as long as they are the same, so for example, in a bond you might only care about the outer atoms for comparison so you would compare (atom1,) and (atom2,) with (atom_storage1,) and (atom_storage2,) However, in a torsion, you might want the atoms and bonds for each outer bond so in that case you would compare (atom1, bond1, atom2) and (atom4, bond3, atom3) with the corresponding storage objects. Returns ------- pairs : list of lists pairs of atoms and storage objects that are most similar, these lists always come in the form (all atom/bonds, all storage objects) for the bond example above you might get [ [atom1, storage1], [atom2, storage2] ] for the torsion example you might get [ [atom4, bond4, atom3, atom_storage1, bond_storage1, atom_storage2], [atom1, bond1, atom2, atom_storage4, bond_storage3, atom_storage3] """ # store paired stets of atoms/bonds and corresponding storages pairs = list() # check for odd cases combo = atoms_and_bonds + storages # 1. both lists are empty if len(combo) == 0: return pairs nones = [None] * len(combo[0]) # 2. no atom/bond storage if len(atoms_and_bonds) == 0: for storage_set in storages: pairs.append(nones + list(storage_set)) return pairs # 3. no storages if len(storages) == 0: for atom_set in atoms_and_bonds: pairs.append(list(atom_set) + nones) return pairs g = nx.Graph() atom_dict = dict() storage_dict = dict() # create a bipartite graph with atoms/bonds on one side for idx, atom_set in enumerate(atoms_and_bonds): g.add_node(idx+1, bipartite=0) atom_dict[idx+1] = atom_set # and atom/bond storage objects on the other for idx, storage_set in enumerate(storages): g.add_node((idx*-1)-1, bipartite=1) storage_dict[(idx*-1)-1] = storage_set # Fill in the weight on each edge of the graph using the compare_atom/bond functions for a_idx, atom_set in atom_dict.items(): for s_idx, storage_set in storage_dict.items(): # sum up score for every entry in the atom and storage set score = 0 for sa, a in zip(storage_set, atom_set): if isinstance(sa, self.BondStorage): score += sa.compare_bond(a) else: score += sa.compare_atom(a) # score can't be zero so save score+1 g.add_edge(a_idx,s_idx,weight=score+1) # calculate maximum matching, that is the pairing of atoms/bonds to # storage objects that leads the the highest overall score matching = nx.algorithms.max_weight_matching(g,maxcardinality=False) # track the atoms assigned a paired storage object pair_set = set() # store all pairs for idx_1, idx_2 in matching: pair_set.add(idx_1) pair_set.add(idx_2) if idx_1 in atom_dict: atom_set = atom_dict[idx_1] storage_set = storage_dict[idx_2] else: atom_set = atom_dict[idx_2] storage_set = storage_dict[idx_1] pairs.append(list(atom_set) + list(storage_set)) # check for missing atom storages for a_idx, atom_set in atom_dict.items(): if a_idx not in pair_set: pairs.append(list(atom_set) + nones) # check for missing atoms for s_idx, storage_set in storage_dict.items(): if s_idx not in pair_set: pairs.append(nones + list(storage_set)) return pairs
def _add_mol(self, mol, smirks_atoms_list): """ private function for adding a new molecule This is used by add_mol if the graph is not empty, allowing the user to not have to track if the graph already has information before adding molecules Parameters ---------- mol : any Mol smirks_atoms_list : list of dicts This is a list of dictionaries of the form [{smirks index: atom index}] each atom (by index) in the dictionary will be added the relevant AtomStorage by smirks index """ for smirks_atoms in smirks_atoms_list: atom_dict = dict() sorted_smirks_atoms = self._symmetry_funct(mol, smirks_atoms) for key, atom_index in enumerate(sorted_smirks_atoms, 1): atom_dict[atom_index] = key atom1 = mol.get_atom_by_index(atom_index) self.atom_by_label[key].add_atom(atom1) for neighbor_key, neighbor_index in enumerate(sorted_smirks_atoms, 1): # check for connecting bond atom2 = mol.get_atom_by_index(neighbor_index) bond = mol.get_bond_by_atoms(atom1, atom2) if bond is not None and (neighbor_key, key) in self.bond_by_label: bond_smirks = tuple(sorted([neighbor_key, key])) self.bond_by_label[bond_smirks].add_bond(bond) for atom_label, atom_index in enumerate(sorted_smirks_atoms, 1): atom = mol.get_atom_by_index(atom_index) storage = self.atom_by_label[atom_label] self._add_layers(mol, atom, storage, self.layers, atom_dict) def _no_symmetry(self, mol, smirks_atoms): """ No change is made to the atom order for this molecule """ return smirks_atoms def _bond_symmetry(self, mol, smirks_atoms): """ Returns a tuple of two atom indices in the order that leads to the atoms that match with previously stored atoms. Parameters ----------- mol : ChemPer Mol smirks_atoms : two tuple tuple of atom indices Returns -------- ordered_smirks_atoms : two tuple tuple of atom indices as they should be added to the graph """ # pair atoms and bonds atom1 = mol.get_atom_by_index(smirks_atoms[0]) atom2 = mol.get_atom_by_index(smirks_atoms[1]) # Find potential storages for those atoms and bonds atoms_and_bonds = [(atom1,), (atom2,)] storages = [ (self.atom_by_label[1],), (self.atom_by_label[2],) ] pairs = self.find_pairs(atoms_and_bonds, storages) ordered_smirks_atoms = [p[0].get_index() for p in sorted(pairs, key=lambda x: x[1].label)] return tuple(ordered_smirks_atoms) def _angle_symmetry(self, mol, smirks_atoms): """ Returns a tuple of three atom indices in the order that leads to the atoms that match with previously stored atoms. Parameters ----------- mol : ChemPer Mol smirks_atoms : three tuple tuple of atom indices Returns -------- ordered_smirks_atoms : three tuple tuple of atom indices as they should be added to the graph """ # get all three atoms atom1 = mol.get_atom_by_index(smirks_atoms[0]) atom2 = mol.get_atom_by_index(smirks_atoms[1]) atom3 = mol.get_atom_by_index(smirks_atoms[2]) # get both bonds bond1 = mol.get_bond_by_atoms(atom1, atom2) bond2 = mol.get_bond_by_atoms(atom2, atom3) if None in (bond1, bond2): return smirks_atoms # save atom and bond pairs that could be reordered atoms_and_bonds = [(atom1, bond1), (atom3, bond2)] # find current atom and bond storage storages = [ (self.atom_by_label[1], self.bond_by_label[(1,2)]), (self.atom_by_label[3], self.bond_by_label[(2,3)]) ] pairs = self.find_pairs(atoms_and_bonds, storages) order = [p[0].get_index() for p in sorted(pairs, key=lambda x: x[2].label)] return tuple((order[0], smirks_atoms[1], order[1])) def _proper_torsion_symmetry(self, mol, smirks_atoms): """ Returns a tuple of four atom indices for a proper torsion reordered to match with previously stored atoms. Parameters ----------- mol : ChemPer Mol smirks_atoms : four tuple tuple of atom indices Returns -------- ordered_smirks_atoms : four tuple tuple of atom indices as they should be added to the graph """ # get all four atoms atom1 = mol.get_atom_by_index(smirks_atoms[0]) atom2 = mol.get_atom_by_index(smirks_atoms[1]) atom3 = mol.get_atom_by_index(smirks_atoms[2]) atom4 = mol.get_atom_by_index(smirks_atoms[3]) # get two relevant bonds bond1 = mol.get_bond_by_atoms(atom1, atom2) bond3 = mol.get_bond_by_atoms(atom3, atom4) if None in (bond1, bond3): return smirks_atoms # make pairs atoms_and_bonds = [ (atom2, bond1, atom1), (atom3, bond3, atom4) ] # get atom and bond storages storages = [ (self.atom_by_label[2], self.bond_by_label[(1,2)], self.atom_by_label[1]), (self.atom_by_label[3], self.bond_by_label[(3,4)], self.atom_by_label[4]) ] pairs = self.find_pairs(atoms_and_bonds, storages) order = [p[0].get_index() for p in sorted(pairs, key=lambda x: x[3].label)] if order[0] == smirks_atoms[1]: return smirks_atoms temp = list(smirks_atoms) temp.reverse() return tuple(temp) def _improper_torsion_symmetry(self, mol, smirks_atoms): """ Returns a tuple of four atom indices for an improper torsion reordered to match with previously stored atoms. Parameters ----------- mol : ChemPer Mol smirks_atoms : four tuple tuple of atom indices Returns -------- ordered_smirks_atoms : four tuple tuple of atom indices as they should be added to the graph """ # get all four atoms atom1 = mol.get_atom_by_index(smirks_atoms[0]) atom2 = mol.get_atom_by_index(smirks_atoms[1]) atom3 = mol.get_atom_by_index(smirks_atoms[2]) atom4 = mol.get_atom_by_index(smirks_atoms[3]) # get all three bonds bond1 = mol.get_bond_by_atoms(atom1, atom2) bond2 = mol.get_bond_by_atoms(atom2, atom3) bond3 = mol.get_bond_by_atoms(atom2, atom4) if None in (bond1, bond2, bond3): return smirks_atoms # make pairs of atoms and bonds to be reordered atoms_and_bonds = [ (atom1, bond1), (atom3, bond2), (atom4, bond3) ] # find current atom and bond storages storages = [ (self.atom_by_label[1], self.bond_by_label[(1,2)]), (self.atom_by_label[3], self.bond_by_label[(2,3)]), (self.atom_by_label[4], self.bond_by_label[(2,4)]) ] pairs = self.find_pairs(atoms_and_bonds, storages) order = [p[0].get_index() for p in sorted(pairs, key=lambda x: x[2].label)] return tuple((order[0], smirks_atoms[1], order[1], order[2]))