Source code for rdkit_to_params

########################################################################################################################
__doc__ = \
    """
    The main class here is ``Params``. All underscore base classes are not meant to be used standalone.
    ``Entries`` is the class for an list of entries of the same kind.
    """
from .version import *

########################################################################################################################


from warnings import warn
import os, re, logging
from typing import Union, Optional, List

#################### base classes ######################################################################################

from ._io_mixin import _ParamsIoMixin # in turn inherits _ParamsInitMixin
from .entries import Entries

#################### pyrosetta #########################################################################################
try:
    from ._pyrosetta_mixin import _PoserMixin, pyrosetta
except ImportError:
    warn('PyRosetta is required for the ``test`` method', category=ImportWarning)


    class _PoserMixin:
        pass

#################### rdkit #############################################################################################
try:
    from .rdkitside import _RDKitMixin, neutralize, DummyMasker
    from .constraint import Constraints
    from rdkit import Chem
except ImportError:
    warn('RDkit is required for ``from_mol`` stuff and ``Constraints``', category=ImportWarning)

    class Chem:
        Atom = None

    class DummyMasker:
        raise ImportError('RDkit is required for DummyMasker')

    class _RDKitMixin:
        pass

    def neutralize(*args, **kargs):
        raise ImportError('RDkit is required for `neutralize` (pH 7 charge correction)')


#################### main class ########################################################################################


[docs]class Params(_ParamsIoMixin, _RDKitMixin, _PoserMixin):
    """
    ``Params`` creates and manipulates params files. It can handles several types of params operations,
    such as "atom name surgery" and ``rdkit.Chem.Mol`` to a params file.

    ## Key methods

    * ``Params.load(filename)`` will instantiate from file.
    * ``Params.from_mol(mol)`` will instantiate from ``Chem.Mol``
    * ``p.dump(filename)`` will save a file.
    * ``loads`` and ``dumps`` for strings.
    * ``p.fields`` will return all header fields.
    * ``p.test`` tests the params file in PyRosetta.
    * ``p.rename_atom(old, new)`` changes an atom name

    ## Attributes

    The attributes are generally the uppercase line headers, with a few exceptions.

    * `.comments` is for # lines
    * "BOND_TYPE" and "BOND" are merged into ``.BOND``.
    * "UPPER", "LOWER" and "CONNECT" are merged into ``.CONNECT``

    With the exception of ``.NAME`` which depends on ``.IO_STRING`` basically
    all the header type attributes are actually instances of the class `Entries`, which holds a sequence of specific entries.
    see `entry.py` for the properties of each.
    These can be a singleton, such as `.IO_STRING` which when a new line is added it gets overwritten instead, or not like say `.ATOM`.
    That is to say that ``.ATOM[0]`` will give the first atom as expected, but this has to be done for ``.IO_STRING[0]`` too.

    Atomnames...

    * ``p.get_correct_atomname`` will return the 4 letter name of the atom with nice spacing.
    * ``p.rename_atom`` will change one atomname to a new one across all entries.
    * ``BOND``, ``CHI``, ``CUT_BOND`` entries store 4 char atomnames as ``.first``, ``.second``, ``.third``, ``.fourth``.
    * ``ICOOR_INTERNAL`` entries store 5 char atomnames as ``.child``,``.parent``,``.second_parent``,``.third_parent``.
    * ``ATOM_ALIAS``, ``NBR_ATOM``, ``FIRST_SIDECHAIN_ATOM``, ``ADD_RING`` are just ``entries.GenericEntries`` instances, where ``.body`` is a string which will contain the atomname.
    * ``METAL_BINDING_ATOMS``, ``ACT_COORD_ATOMS`` are ``entries.GenericListEntries`` instances where ``.values`` is a list of string with maybe atomnames.

    ## Inheritance

    It inherits several class,
    which are not not mean to be used standalone, except for testing.

    The pyrosetta and rdkit functionality are dependent on these being installed.

    * ``_ParamsIoMixin`` adds read write, and inherits
    * ``_ParamsInitMixin`` which adds the basics.
    * ``_PoserMixin`` is a base that adds pyrosetta functionality if avaliable.
    * ``_RDKitCovertMixin``, which adds rdkit ``from_mol`` conversion functionality, the class is split in two, the other part being
    * ``_RDKitParamsPrepMixin``, which prepares the molecule for `_RDKitCovertMixin.from_mol``.

    """
    log = logging.getLogger(__name__)

    @property
    def NAME(self):
        if len(self.IO_STRING):
            return self.IO_STRING[0].name3
        else:
            self.log.warning('Attempted access to empty IO_STRING/NAME')
            return 'XXX'

    @NAME.setter
    def NAME(self, name):
        if len(self.IO_STRING) == 0:
            self.IO_STRING.append(f'{name} Z')
        else:
            self.IO_STRING[0].name3 = name

    @property
    def nbr(self):
        if self.NBR_RADIUS:
            return float(self.NBR_RADIUS[0].body)
        else:
            return float('nan')

[docs]    def is_aminoacid(self):
        if len(self.TYPE) == 0:
            self.TYPE.append('LIGAND')
        return self.TYPE[0].body == 'POLYMER'

[docs]    def validate(self):
        self.log.critical('This is not finished.')
        if 'CANONICAL_AA' in self.PROPERTIES[0].values:
            assert self.AA != 'UNK', 'CANONICAL_AA property requires a AA type not UNK'
        if 'METALBINDING' in self.PROPERTIES[0].values:
            assert len(self.METAL_BINDING_ATOMS) > 0, 'METALBINDING property requires METAL_BINDING_ATOMS'
        assert os.path.exists(self.PDB_ROTAMERS.strip()), f'PDB_ROTAMERS file {self.PDB_ROTAMERS} does not exist'
        # etc.

[docs]    def get_correct_atomname(self, name: str) -> str:
        """
        Given a name, gets the correctly spaced out one as appears in the ATOM entry.
        To pad out a name use pad_name
        This has nothing to do with ``._get_PDBInfo_atomname`` which just returns the atom name from a ``Chem.Atom``.

        :param name: dirty name
        :return: correct name
        """
        name = name.upper()
        # find in atom.
        for atom in self.ATOM:
            if atom.name == name:
                return name
        else:
            for atom in self.ATOM:
                if atom.name.strip() == name.strip():
                    return atom.name
            else:
                raise ValueError(f'{name} is not a valid atom name (does not appear in the entries)')

[docs]    def rename_atom(self, atom_or_atomname: Union[str, 'Chem.Atom'], newname: str, overwrite=True) -> Union[str, None]:
        """
        rename an atom by atomname or Chem.Atom (the former just calls ``rename_atom_by_name`` as is just for legacy)

        calls ``rename_atom_by_name`` -> ``_rename_atom_in_entries``

        :param atom_or_atomname:
        :param newname:
        :return:
        """
        # sanity
        if newname is None:
            return None
        try:
            # is there a rdkit.Chem.Mol?
            if self.mol:
                atom = self.get_atom_by_name(newname)
                if isinstance(atom_or_atomname, str):
                    raise AssertionError(f'New name {newname} already exists')
                elif isinstance(atom_or_atomname, Chem.Atom) and atom_or_atomname.GetIdx() != atom.GetIdx():
                    raise AssertionError(f'New name {newname} already exists')
                else:
                    pass  # already changed.
            else:
                # no rdkit.Chem.Mol means that it is not regenerated
                pass
            if len(self.ATOM) > 0:  # there are defi
                newname = self.get_correct_atomname(newname)
        except ValueError:
            pass  # absent
        # change.
        if isinstance(atom_or_atomname, str): #atom name
            oldname = atom_or_atomname
            return self.rename_atom_by_name(oldname, newname)
        elif isinstance(atom_or_atomname, Chem.Atom):
            atom = atom_or_atomname
            oldname = self._get_PDBInfo_atomname(atom, throw=False)
            if oldname:
                return self.rename_atom_by_name(oldname, newname)  # alters entry & rdkit
            else:
                return self._set_PDBInfo_atomname(atom, newname, overwrite=overwrite)  # alters rdkit
        else:
            raise TypeError(f'{type(atom_or_atomname)} is not a string or atom')


[docs]    def rename_atom_by_name(self, oldname: str, newname: str) -> str:
        """
        Change the atom name from ``oldname`` to ``newname`` and returns the 4 char ``newname``.

        :param oldname: atom name, preferably 4 char long.
        :param newname: atom name, preferably 4 char long.
        :return: 4 char newname
        """
        if newname is None:
            return None
        elif oldname == newname:
            return newname
        else:
            # rdkit mol
            if self.mol:
                atom = self.get_atom_by_name(oldname)
                newname = self.pad_name(newname, atom)
                atom.GetPDBResidueInfo().SetName(newname)
            else:
                newname = self.pad_name(newname)
            # params
            self._rename_atom_in_entries(oldname, newname)
            return newname

[docs]    def _rename_atom_in_entries(self, oldname, newname):
        # if params is not filled nothing happens.
        # check if it is a connect atom
        if len(self.ATOM) == 0:
            return None # N/A: unparameterise atm.
        elif oldname.strip() == 'CONN':
            pass  # ...
        elif oldname.strip() in ('CONN1', 'CONN2', 'CONN3', 'LOWER', 'UPPER'):
            for conn in self.CONNECT:
                if conn.connect_name.strip() == oldname.strip():
                    conn.connect_name = newname
                    # TODO fix this properly. LOWER UPPER CONN3 should be the preferred order.
                    break
            else:
                raise ValueError(f'{oldname} does not appear amid the connections')
            for entry in self.ICOOR_INTERNAL:
                for key in 'child', 'parent', 'second_parent', 'third_parent':
                    if getattr(entry, key) == oldname.rjust(5):
                        setattr(entry, key, newname.rjust(5))
        else:
            # fix names
            oldname = self.get_correct_atomname(oldname)
            if len(newname) > 4:
                raise ValueError(f'{newname} is too long.')
            elif newname == 'END':
                self.log.info('I thing END may be an old keyword - What is ``ACT_COORD_ATOMS``?. BEST AVOID IT.')
            newname = self.pad_name(newname).upper()
            # find in atom.
            for atom in self.ATOM:
                if atom.name == newname:
                    raise ValueError(f'{newname} is already taken.')
                elif atom.name == oldname:
                    atom.name = newname
                    break
            # find in entries of the kind with ``first``, ``second``, ``third``, ``fourth`` attributes, which are atom names 4 char
            for attr in ('BOND', 'CHARGE', 'CHI', 'CUT_BOND', 'CHARGE'):  # ICOOR_INTERNAL ATOM_ALIAS
                for entry in getattr(self, attr):
                    for key in 'first', 'second', 'third', 'fourth', 'atom':
                        if not hasattr(entry, key):
                            continue
                        elif getattr(entry, key) == oldname or getattr(entry, key) == oldname.strip():
                            setattr(entry, key, newname)
                            break
                        else:
                            pass
            # find ICOOR_INTERNAL entries with ``child``,``parent``,``second_parent``,``third_parent`` attributes,
            # which are atom names 5 char
            for entry in self.ICOOR_INTERNAL:
                for key in 'child', 'parent', 'second_parent', 'third_parent':
                    if getattr(entry, key).strip() == oldname.strip():
                        setattr(entry, key, newname.ljust(5))
            for conn in self.CONNECT:
                if conn.atom_name.strip() == oldname.strip():
                    conn.atom_name = newname
            # find in the Generic entries
            for attr in ('ATOM_ALIAS', 'NBR_ATOM', 'FIRST_SIDECHAIN_ATOM', 'ADD_RING'):
                for entry in getattr(self, attr):
                    if oldname.strip() in entry.body:
                        entry.body = re.sub('(?<!\w)' + oldname.strip() + '(?!\w)', newname, entry.body)
            # find in the Generic list entries
            for attr in ('METAL_BINDING_ATOMS', 'ACT_COORD_ATOMS', 'MAINCHAIN_ATOMS'):
                for entry in getattr(self, attr):
                    entry.values = [v if v.strip() != oldname.strip() else newname for v in entry.values]

    # ==== extras for cap

[docs]    def _prep_for_terminal(self,  mainchain_atoms: Optional[List[str]]=None, connection_idx: int=1):
        """
        p = Params.from_smiles('*C(=O)[C@@]1NC(=O)CC1', name='CAP', atomnames=[None, 'C', 'O', 'CA', 'N'])
        p.make_N_terminal_cap(mainchain_atoms=['C', 'O', 'CA', 'N'])
        import nglview as nv
        view = nv.show_rosetta(p.to_polymeric_pose(sequence='X[CAP]AA'))
        view.add_hyperball('*')
        view
        """
        assert connection_idx > 0, 'Fortran counting the connection_idx'
        assert len(self.CONNECT) >= connection_idx, 'No attachment atom... without a connection it\'s a ligand'
        self.TYPE[0] = 'POLYMER'
        self.AA.append('UNK')
        self.PROPERTIES.append('TERMINUS')
        # deal with mainchain
        if mainchain_atoms is None:
            mainchain_atoms = []
        self.MAINCHAIN_ATOMS.append(mainchain_atoms)
        # correct rtype.
        expected = {'C': 'CObb', 'CA':'CAbb', 'N': 'Nbb', 'H': 'HNbb'}
        for atom_name in mainchain_atoms:
            if atom_name.strip() in expected:
                #rdkit.Mol
                if self.mol:
                    atom = self.get_atom_by_name(atom_name)
                    atom.SetProp('_rType', expected[atom_name.strip()])
                # entries
                for atom_entry in self.ATOM:
                    if atom_entry.name.strip() == atom_name.strip():
                        atom_entry.rtype = expected[atom_name.strip()]
                        break
                else:
                    raise ValueError(f'{atom_name} does not appear in the ATOM entries.')

[docs]    def _change_conn_for_terminal(self, connection_idx, new_name):
        """
        LOWER_CONNECT attaches to N
        UPPER_CONNECT attaches to C

        :param connection_idx:
        :param new_name:
        :return:
        """
        for conn in self.CONNECT:
            if conn.index == connection_idx:
                self.rename_atom(conn.connect_name, new_name)
                self.FIRST_SIDECHAIN_ATOM.append(conn.atom_name)
                conn.connect_type=f'{new_name}_CONNECT'
                conn.connect_name=new_name
                break
        else:
            raise ValueError('Why cannot find connect? CONNECT definitions are wrong.')

[docs]    def make_C_terminal_cap(self, mainchain_atoms=None, connection_idx=1):
        """
        Make current covalent compound into a C-terminal cap, aka. goes on the C-terminal end of the peptide.
        That is the compound has a N-terminus (UPPER)

        :param mainchain_atoms: mainchain atoms.
        :param connection_idx: Fortran indiced
        :return:
        """
        self._prep_for_terminal(mainchain_atoms, connection_idx)
        # self.VARIANT.append(['LOWER_TERMINUS_VARIANT'])
        self._change_conn_for_terminal(connection_idx, 'LOWER')
        self.CONNECT.append(dict(atom_name='',
                                 index=len(self.CONNECT)+1,
                                 connect_type='UPPER_CONNECT NONE',
                                 connect_name='UPPER')
                           )

[docs]    def make_N_terminal_cap(self, mainchain_atoms=None, connection_idx=1):
        """
                Make current covalent compound into a N-terminal cap, aka. goes on the N-terminal end of the peptide.
                That is the compound has a C-terminus (LOWER)
                LOWER_CONNECT attaches to N so should be None.


                :param connection_idx: Fortran indiced
                :return:
                """
        self._prep_for_terminal(mainchain_atoms, connection_idx)
        # self.VARIANT.append(['UPPER_TERMINUS_VARIANT'])
        self._change_conn_for_terminal(connection_idx, 'UPPER')
        self.CONNECT.append(dict(atom_name='',
                                         index=len(self.CONNECT) + 1,
                                         connect_type='LOWER_CONNECT NONE',
                                         connect_name='LOWER')
                            )