Source code for rdkit_to_params

########################################################################################################################
__doc__ = \
    """
    The main class here is ``Params``. All underscore base classes are not meant to be used standalone.
    ``Entries`` is the class for an list of entries of the same kind.
    """
from .version import *

########################################################################################################################


from warnings import warn
import os, re, logging
from typing import Union, Optional, List

#################### base classes ######################################################################################

from ._io_mixin import _ParamsIoMixin # in turn inherits _ParamsInitMixin
from .entries import Entries

#################### pyrosetta #########################################################################################
try:
    from ._pyrosetta_mixin import _PoserMixin, pyrosetta
except ImportError:
    warn('PyRosetta is required for the ``test`` method', category=ImportWarning)


    class _PoserMixin:
        pass

#################### rdkit #############################################################################################
try:
    from .rdkitside import _RDKitMixin, neutralize, DummyMasker
    from .constraint import Constraints
    from rdkit import Chem
except ImportError:
    warn('RDkit is required for ``from_mol`` stuff and ``Constraints``', category=ImportWarning)

    class Chem:
        Atom = None

    class DummyMasker:
        raise ImportError('RDkit is required for DummyMasker')

    class _RDKitMixin:
        pass

    def neutralize(*args, **kargs):
        raise ImportError('RDkit is required for `neutralize` (pH 7 charge correction)')


#################### main class ########################################################################################


[docs]class Params(_ParamsIoMixin, _RDKitMixin, _PoserMixin): """ ``Params`` creates and manipulates params files. It can handles several types of params operations, such as "atom name surgery" and ``rdkit.Chem.Mol`` to a params file. ## Key methods * ``Params.load(filename)`` will instantiate from file. * ``Params.from_mol(mol)`` will instantiate from ``Chem.Mol`` * ``p.dump(filename)`` will save a file. * ``loads`` and ``dumps`` for strings. * ``p.fields`` will return all header fields. * ``p.test`` tests the params file in PyRosetta. * ``p.rename_atom(old, new)`` changes an atom name ## Attributes The attributes are generally the uppercase line headers, with a few exceptions. * `.comments` is for # lines * "BOND_TYPE" and "BOND" are merged into ``.BOND``. * "UPPER", "LOWER" and "CONNECT" are merged into ``.CONNECT`` With the exception of ``.NAME`` which depends on ``.IO_STRING`` basically all the header type attributes are actually instances of the class `Entries`, which holds a sequence of specific entries. see `entry.py` for the properties of each. These can be a singleton, such as `.IO_STRING` which when a new line is added it gets overwritten instead, or not like say `.ATOM`. That is to say that ``.ATOM[0]`` will give the first atom as expected, but this has to be done for ``.IO_STRING[0]`` too. Atomnames... * ``p.get_correct_atomname`` will return the 4 letter name of the atom with nice spacing. * ``p.rename_atom`` will change one atomname to a new one across all entries. * ``BOND``, ``CHI``, ``CUT_BOND`` entries store 4 char atomnames as ``.first``, ``.second``, ``.third``, ``.fourth``. * ``ICOOR_INTERNAL`` entries store 5 char atomnames as ``.child``,``.parent``,``.second_parent``,``.third_parent``. * ``ATOM_ALIAS``, ``NBR_ATOM``, ``FIRST_SIDECHAIN_ATOM``, ``ADD_RING`` are just ``entries.GenericEntries`` instances, where ``.body`` is a string which will contain the atomname. * ``METAL_BINDING_ATOMS``, ``ACT_COORD_ATOMS`` are ``entries.GenericListEntries`` instances where ``.values`` is a list of string with maybe atomnames. ## Inheritance It inherits several class, which are not not mean to be used standalone, except for testing. The pyrosetta and rdkit functionality are dependent on these being installed. * ``_ParamsIoMixin`` adds read write, and inherits * ``_ParamsInitMixin`` which adds the basics. * ``_PoserMixin`` is a base that adds pyrosetta functionality if avaliable. * ``_RDKitCovertMixin``, which adds rdkit ``from_mol`` conversion functionality, the class is split in two, the other part being * ``_RDKitParamsPrepMixin``, which prepares the molecule for `_RDKitCovertMixin.from_mol``. """ log = logging.getLogger(__name__) @property def NAME(self): if len(self.IO_STRING): return self.IO_STRING[0].name3 else: self.log.warning('Attempted access to empty IO_STRING/NAME') return 'XXX' @NAME.setter def NAME(self, name): if len(self.IO_STRING) == 0: self.IO_STRING.append(f'{name} Z') else: self.IO_STRING[0].name3 = name @property def nbr(self): if self.NBR_RADIUS: return float(self.NBR_RADIUS[0].body) else: return float('nan')
[docs] def is_aminoacid(self): if len(self.TYPE) == 0: self.TYPE.append('LIGAND') return self.TYPE[0].body == 'POLYMER'
[docs] def validate(self): self.log.critical('This is not finished.') if 'CANONICAL_AA' in self.PROPERTIES[0].values: assert self.AA != 'UNK', 'CANONICAL_AA property requires a AA type not UNK' if 'METALBINDING' in self.PROPERTIES[0].values: assert len(self.METAL_BINDING_ATOMS) > 0, 'METALBINDING property requires METAL_BINDING_ATOMS' assert os.path.exists(self.PDB_ROTAMERS.strip()), f'PDB_ROTAMERS file {self.PDB_ROTAMERS} does not exist'
# etc.
[docs] def get_correct_atomname(self, name: str) -> str: """ Given a name, gets the correctly spaced out one as appears in the ATOM entry. To pad out a name use pad_name This has nothing to do with ``._get_PDBInfo_atomname`` which just returns the atom name from a ``Chem.Atom``. :param name: dirty name :return: correct name """ name = name.upper() # find in atom. for atom in self.ATOM: if atom.name == name: return name else: for atom in self.ATOM: if atom.name.strip() == name.strip(): return atom.name else: raise ValueError(f'{name} is not a valid atom name (does not appear in the entries)')
[docs] def rename_atom(self, atom_or_atomname: Union[str, 'Chem.Atom'], newname: str, overwrite=True) -> Union[str, None]: """ rename an atom by atomname or Chem.Atom (the former just calls ``rename_atom_by_name`` as is just for legacy) calls ``rename_atom_by_name`` -> ``_rename_atom_in_entries`` :param atom_or_atomname: :param newname: :return: """ # sanity if newname is None: return None try: # is there a rdkit.Chem.Mol? if self.mol: atom = self.get_atom_by_name(newname) if isinstance(atom_or_atomname, str): raise AssertionError(f'New name {newname} already exists') elif isinstance(atom_or_atomname, Chem.Atom) and atom_or_atomname.GetIdx() != atom.GetIdx(): raise AssertionError(f'New name {newname} already exists') else: pass # already changed. else: # no rdkit.Chem.Mol means that it is not regenerated pass if len(self.ATOM) > 0: # there are defi newname = self.get_correct_atomname(newname) except ValueError: pass # absent # change. if isinstance(atom_or_atomname, str): #atom name oldname = atom_or_atomname return self.rename_atom_by_name(oldname, newname) elif isinstance(atom_or_atomname, Chem.Atom): atom = atom_or_atomname oldname = self._get_PDBInfo_atomname(atom, throw=False) if oldname: return self.rename_atom_by_name(oldname, newname) # alters entry & rdkit else: return self._set_PDBInfo_atomname(atom, newname, overwrite=overwrite) # alters rdkit else: raise TypeError(f'{type(atom_or_atomname)} is not a string or atom')
[docs] def rename_atom_by_name(self, oldname: str, newname: str) -> str: """ Change the atom name from ``oldname`` to ``newname`` and returns the 4 char ``newname``. :param oldname: atom name, preferably 4 char long. :param newname: atom name, preferably 4 char long. :return: 4 char newname """ if newname is None: return None elif oldname == newname: return newname else: # rdkit mol if self.mol: atom = self.get_atom_by_name(oldname) newname = self.pad_name(newname, atom) atom.GetPDBResidueInfo().SetName(newname) else: newname = self.pad_name(newname) # params self._rename_atom_in_entries(oldname, newname) return newname
[docs] def _rename_atom_in_entries(self, oldname, newname): # if params is not filled nothing happens. # check if it is a connect atom if len(self.ATOM) == 0: return None # N/A: unparameterise atm. elif oldname.strip() == 'CONN': pass # ... elif oldname.strip() in ('CONN1', 'CONN2', 'CONN3', 'LOWER', 'UPPER'): for conn in self.CONNECT: if conn.connect_name.strip() == oldname.strip(): conn.connect_name = newname # TODO fix this properly. LOWER UPPER CONN3 should be the preferred order. break else: raise ValueError(f'{oldname} does not appear amid the connections') for entry in self.ICOOR_INTERNAL: for key in 'child', 'parent', 'second_parent', 'third_parent': if getattr(entry, key) == oldname.rjust(5): setattr(entry, key, newname.rjust(5)) else: # fix names oldname = self.get_correct_atomname(oldname) if len(newname) > 4: raise ValueError(f'{newname} is too long.') elif newname == 'END': self.log.info('I thing END may be an old keyword - What is ``ACT_COORD_ATOMS``?. BEST AVOID IT.') newname = self.pad_name(newname).upper() # find in atom. for atom in self.ATOM: if atom.name == newname: raise ValueError(f'{newname} is already taken.') elif atom.name == oldname: atom.name = newname break # find in entries of the kind with ``first``, ``second``, ``third``, ``fourth`` attributes, which are atom names 4 char for attr in ('BOND', 'CHARGE', 'CHI', 'CUT_BOND', 'CHARGE'): # ICOOR_INTERNAL ATOM_ALIAS for entry in getattr(self, attr): for key in 'first', 'second', 'third', 'fourth', 'atom': if not hasattr(entry, key): continue elif getattr(entry, key) == oldname or getattr(entry, key) == oldname.strip(): setattr(entry, key, newname) break else: pass # find ICOOR_INTERNAL entries with ``child``,``parent``,``second_parent``,``third_parent`` attributes, # which are atom names 5 char for entry in self.ICOOR_INTERNAL: for key in 'child', 'parent', 'second_parent', 'third_parent': if getattr(entry, key).strip() == oldname.strip(): setattr(entry, key, newname.ljust(5)) for conn in self.CONNECT: if conn.atom_name.strip() == oldname.strip(): conn.atom_name = newname # find in the Generic entries for attr in ('ATOM_ALIAS', 'NBR_ATOM', 'FIRST_SIDECHAIN_ATOM', 'ADD_RING'): for entry in getattr(self, attr): if oldname.strip() in entry.body: entry.body = re.sub('(?<!\w)' + oldname.strip() + '(?!\w)', newname, entry.body) # find in the Generic list entries for attr in ('METAL_BINDING_ATOMS', 'ACT_COORD_ATOMS', 'MAINCHAIN_ATOMS'): for entry in getattr(self, attr): entry.values = [v if v.strip() != oldname.strip() else newname for v in entry.values]
# ==== extras for cap
[docs] def _prep_for_terminal(self, mainchain_atoms: Optional[List[str]]=None, connection_idx: int=1): """ p = Params.from_smiles('*C(=O)[C@@]1NC(=O)CC1', name='CAP', atomnames=[None, 'C', 'O', 'CA', 'N']) p.make_N_terminal_cap(mainchain_atoms=['C', 'O', 'CA', 'N']) import nglview as nv view = nv.show_rosetta(p.to_polymeric_pose(sequence='X[CAP]AA')) view.add_hyperball('*') view """ assert connection_idx > 0, 'Fortran counting the connection_idx' assert len(self.CONNECT) >= connection_idx, 'No attachment atom... without a connection it\'s a ligand' self.TYPE[0] = 'POLYMER' self.AA.append('UNK') self.PROPERTIES.append('TERMINUS') # deal with mainchain if mainchain_atoms is None: mainchain_atoms = [] self.MAINCHAIN_ATOMS.append(mainchain_atoms) # correct rtype. expected = {'C': 'CObb', 'CA':'CAbb', 'N': 'Nbb', 'H': 'HNbb'} for atom_name in mainchain_atoms: if atom_name.strip() in expected: #rdkit.Mol if self.mol: atom = self.get_atom_by_name(atom_name) atom.SetProp('_rType', expected[atom_name.strip()]) # entries for atom_entry in self.ATOM: if atom_entry.name.strip() == atom_name.strip(): atom_entry.rtype = expected[atom_name.strip()] break else: raise ValueError(f'{atom_name} does not appear in the ATOM entries.')
[docs] def _change_conn_for_terminal(self, connection_idx, new_name): """ LOWER_CONNECT attaches to N UPPER_CONNECT attaches to C :param connection_idx: :param new_name: :return: """ for conn in self.CONNECT: if conn.index == connection_idx: self.rename_atom(conn.connect_name, new_name) self.FIRST_SIDECHAIN_ATOM.append(conn.atom_name) conn.connect_type=f'{new_name}_CONNECT' conn.connect_name=new_name break else: raise ValueError('Why cannot find connect? CONNECT definitions are wrong.')
[docs] def make_C_terminal_cap(self, mainchain_atoms=None, connection_idx=1): """ Make current covalent compound into a C-terminal cap, aka. goes on the C-terminal end of the peptide. That is the compound has a N-terminus (UPPER) :param mainchain_atoms: mainchain atoms. :param connection_idx: Fortran indiced :return: """ self._prep_for_terminal(mainchain_atoms, connection_idx) # self.VARIANT.append(['LOWER_TERMINUS_VARIANT']) self._change_conn_for_terminal(connection_idx, 'LOWER') self.CONNECT.append(dict(atom_name='', index=len(self.CONNECT)+1, connect_type='UPPER_CONNECT NONE', connect_name='UPPER') )
[docs] def make_N_terminal_cap(self, mainchain_atoms=None, connection_idx=1): """ Make current covalent compound into a N-terminal cap, aka. goes on the N-terminal end of the peptide. That is the compound has a C-terminus (LOWER) LOWER_CONNECT attaches to N so should be None. :param connection_idx: Fortran indiced :return: """ self._prep_for_terminal(mainchain_atoms, connection_idx) # self.VARIANT.append(['UPPER_TERMINUS_VARIANT']) self._change_conn_for_terminal(connection_idx, 'UPPER') self.CONNECT.append(dict(atom_name='', index=len(self.CONNECT) + 1, connect_type='LOWER_CONNECT NONE', connect_name='LOWER') )