Source code for rdkit_to_params.entries

########################################################################################################################
__doc__ = \
    """
The main class here is `Entries``, which is a fancy list. 
It gets called for each uppercase attribute 
in the initialisation of ``Params`` 
(which happens in ``_ParamsInitMixin`` __e.g.__ ``Entries.from_name('IO_STRING')``).
    """

import warnings

from .version import *
from typing import Optional, Union

########################################################################################################################

from dataclasses import dataclass
import re, logging
from warnings import warn

from collections import abc
from enum import Enum


# ======================================================================================================================

[docs]class Singletony(Enum): """ Is the entry 1. a singleton (e.g. ``NAME``) which can have only one value 0. a regular multientry affair (e.g. ``ATOM``) 2. a singleton (e.g. ``PROPERTY``) which can accept multiple values """ multiton = 0 # non-singleton singleton = 1 list_singleton = 2
# ----------------------------------------------------------------------------------------------------------------------
[docs]def html_span(inner:Union[str, float], color: Optional[str]=None) -> str: """ Simple span element for _repr_html_ '#FA8072' :param inner: :param color: default is #FA8072, salmon if #40e0d0 :return: """ if color is not None: pass elif isinstance(inner, str): color = '#FA8072' elif isinstance(inner, float): color = '#40e0d0' else: color = 'grey' return f'<span style="color:{color}">{inner}</span>'
# ----------------------------------------------------------------------------------------------------------------------
[docs]class Entries(abc.MutableSequence): """ A fancy default list, where the elements are instances of whatver is in ``entry_cls``. It can be initialised via the class method ``from_name`` which accepst a string that has to be present in the class attribute ``.choices``. The ``.append`` method can work with str, list, dict or instance of the actual class it wants. Note that the string for the string way must be without the header to the line. The entry classes requires a ``from_str`` classmethod that returns an instance for this. They also require __str__ method as this is how the entries are converted into string. ``Entries.from_name('BOND')`` """ choices = {} # this gets filled after each class is declared.
[docs] def __init__(self, entry_cls, singleton: Singletony = Singletony.singleton): """ The entries class is a fancy constrained list. The data is actually stored in ``.data``. :param entry_cls: what is the allowed class of the entries :param singleton: is only one entry allowed? """ self.entry_cls = entry_cls self.singleton = Singletony(singleton) if self.singleton != Singletony.multiton and isinstance(self.entry_cls, GenericListEntry): raise TypeError(f'{type(self.entry_cls)} is incompatible with {self.singleton}') self.data = []
[docs] @classmethod def from_name(cls, name: str): if name in cls.choices: cc, singleton = cls.choices[name] return cls(entry_cls=cc, singleton=singleton) else: raise KeyError(f'Name {name} is not one of {", ".join(cls.choices.keys())}')
def __getitem__(self, index): return self.data[index]
[docs] def _assign_value(self, value): if isinstance(value, self.entry_cls): return value elif isinstance(value, str): return self.entry_cls.from_str(value) elif isinstance(value, list): return self.entry_cls(*value) elif isinstance(value, dict): return self.entry_cls(**value) else: raise ValueError(f'No idea what to do with {value}')
def __setitem__(self, index, value): if self.singleton == Singletony.multiton: self.data[index] = self._assign_value(value) elif self.singleton == Singletony.singleton or len(self.data) == 0: self.data = [self._assign_value(value)] else: # assumes it's a GenericListEntry and has a value. neo = self._assign_value(value) self.data[0].values.extend(neo.values) def __delitem__(self, index): if self.singleton: index = 0 del self.data[index]
[docs] def insert(self, index, value): if self.singleton == Singletony.multiton: # non-singleton self.data.insert(index, self._assign_value(value)) else: self[0] = value
def __len__(self): return len(self.data) def __str__(self): lines = [] for entry in self.data: lines.append(str(entry)) return '\n'.join(lines)
[docs] def _repr_html_(self): lines = [] for entry in self.data: lines.append(entry._repr_html_()) return '<br/>'.join(lines)
#########################################################################################################
[docs]class GenericEntry: """ This is meant to be inherited. ``header`` is the entry type. body is a string. """ log = logging.getLogger(__name__)
[docs] def __init__(self, header: str, body: str): self.header = header.strip().upper() self.body = body.rstrip() assert self.header, f'Type is empty' assert self.body, f'Value is empty'
def __str__(self) -> str: return f'{self.header} {self.body}'
[docs] def _repr_html_(self): return f'{html_span(self.header)}: {self.body}'
[docs] @classmethod def from_str(cls, text): return cls(text)
#########################################################################################################
[docs]class GenericListEntry: """ This is meant to be inherited. ``header`` is the entry type. ``values`` is a list of strings. """
[docs] def __init__(self, header: str, *args: str): self.header = header.strip().upper() self.values = list(args)
def __str__(self) -> str: v = ' '.join(self.values) return f'{self.header} {v}'
[docs] def _repr_html_(self): v = ' '.join(self.values) return f'{html_span(self.header)}: {v}'
[docs] @classmethod def from_str(cls, text): return cls(*text.split())
#########################################################################################################
[docs]class NBR_ATOMEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='NBR_ATOM', body=body)
Entries.choices['NBR_ATOM'] = (NBR_ATOMEntry, Singletony.singleton) #########################################################################################################
[docs]class NBR_RADIUSEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='NBR_RADIUS', body=body)
Entries.choices['NBR_RADIUS'] = (NBR_RADIUSEntry, Singletony.singleton) #########################################################################################################
[docs]class MAINCHAIN_ATOMS(GenericListEntry):
[docs] def __init__(self, *args: str): super().__init__('MAINCHAIN_ATOMS', *args)
Entries.choices['MAINCHAIN_ATOMS'] = (MAINCHAIN_ATOMS, Singletony.list_singleton) #########################################################################################################
[docs]class CommentEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='#', body=body)
Entries.choices['#'] = (CommentEntry, Singletony.multiton) Entries.choices['comment'] = (CommentEntry, Singletony.multiton) #########################################################################################################
[docs]class ATOM_ALIASEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='ATOM_ALIAS', body=body)
Entries.choices['ATOM_ALIAS'] = (ATOM_ALIASEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class IO_STRINGEntry: """ * ``.name3`` is three letter name. ``Params().NAME`` is actually a dynamic attribute that uses this. * ``.name1`` is a one letter name. These get checked for length. """ name3: str = 'LIG' name1: str = 'Z' def __post_init__(self): assert len(self.name1) == 1, f'{self.name1} is not 1 char long' # ToDo figure out what the official standard is for non-three letter 3-letter codes if len(self.name3) == 3: return elif len(self.name3) == 2: self.name3 += ' ' elif len(self.name3) == 1: self.name3 += ' ' else: # this will raise an error otherwise. raise ValueError(f'{self.name3} is not 3 char long') def __str__(self) -> str: return f'IO_STRING {self.name3} {self.name1}'
[docs] def _repr_html_(self): return f'{html_span("IO_STRING")} {self.name3} {self.name1}'
[docs] @classmethod def from_str(cls, text): name3, name1 = text.rstrip().split() return cls(name3, name1)
Entries.choices['IO_STRING'] = (IO_STRINGEntry, Singletony.singleton) #########################################################################################################
[docs]@dataclass class CONNECTEntry: """ This is a mess, but it guesses what you mean. Deals with UPPER, LOWER and CONNECT. """ atom_name: str index: int = 1 connect_type: str = '' # | 'CONNECT' | 'UPPER_CONNECT' | 'LOWER_CONNECT' connect_name: str = '' # 'CONN1' | 'UPPER' | 'LOWER' def __post_init__(self): if self.connect_type and self.connect_name: pass elif not self.connect_type and not self.connect_name: self.connect_type = 'CONNECT' self.connect_name = f'CONN{self.index}' elif not self.connect_type and 'CONN' not in self.connect_name: self.connect_type = f'{self.connect_name}_CONNECT' elif not self.connect_type: self.connect_type = 'CONNECT' elif not self.connect_name and self.connect_type == 'CONNECT': self.connect_name = f'CONN{self.index}' elif not self.connect_name: self.connect_name = self.connect_type.replace('_CONNECT', '') else: raise ValueError( f'I missed this case ({self.connect_name}, {self.connect_type}) in this badly written method') def __str__(self) -> str: return f'{self.connect_type} {self.atom_name}'
[docs] def _repr_html_(self): return f'{html_span(self.connect_type)} {self.atom_name}'
[docs] @classmethod def from_str(cls, text): return cls(*text.split())
Entries.choices['CONNECT'] = (CONNECTEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class CHIEntry: index: int first: str second: str third: str fourth: str def __post_init__(self): self.fourth = self.fourth.ljust(4) def __str__(self) -> str: return f'CHI {self.index} {self.first} {self.second} {self.third} {self.fourth}'
[docs] def _repr_html_(self): return f'{html_span("CHI")} {html_span(self.index)} {self.first} {self.second} {self.third} {self.fourth}'
[docs] @classmethod def from_str(cls, text: str): # 1 C6 C5 C4 C3 rex = re.match('(\d+)\s+(\S{1,4})\s+(\S{1,4})\s+(\S{1,4})\s+(\S{1,4})', text) if rex is None: raise ValueError(f'CHI entry "{text}" is not formatted correctly') data = dict(zip(('index', 'first', 'second', 'third', 'fourth'), rex.groups())) return cls(**data)
Entries.choices['CHI'] = (CHIEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class ICOOR_INTERNALEntry: """ Lines stolen from Rosetta documentation > Child Phi Angle Theta Distance Parent Angle Torsion > ICOOR_INTERNAL C14 167.536810 59.880644 1.473042 N2 C11 C12 * Child atom (A4) * phi angle (torsion angle between A1, A2, A3, A4) * theta angle (improper angle = (180 - (angle between A4, A3, A2))) * distance (between A4 and A3) * parent atom (A3) * angle atom (A2) * torsion atom (A4) """ child: str phi: float theta: float distance: float parent: str second_parent: str third_parent: str def __post_init__(self): self.third_parent = self.third_parent.ljust(5) def __str__(self) -> str: return f'ICOOR_INTERNAL {self.child: <5} {self.phi: >11.6f} {self.theta: >11.6f} {self.distance: >11.6f} ' + \ f'{self.parent: <5} {self.second_parent: <5} {self.third_parent: <5}'
[docs] def _repr_html_(self): return f'{html_span("ICOOR_INTERNAL")} target:{self.child} '+\ f'&phi;:{html_span(self.phi)} '+\ f'&theta;:{html_span(self.theta)} '+\ f'distance:{html_span(self.distance)} ' + \ f'parent:{self.parent} ' + \ f'2nd parent:{self.second_parent} ' + \ f'3rd parent:{self.third_parent}'
[docs] @classmethod def from_str(cls, text: str): # position based. rex = re.match(' (.{5}) (.{11}) (.{11}) (.{11}) (.{5}) (.{5}) (.{1,5})$', text.rstrip()) # space based... bad. rex2 = re.search('(\w+)\s+([-\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)\s+(\w+)\s+(\w+)\s+(\w+)$', text.rstrip()) if rex: data = list(rex.groups()) elif rex2: data = list(rex2.groups()) else: raise ValueError(f'ICOOR_INTERNAL Entry "{text}" is not formatted correctly') for i in range(1, 4): data[i] = float(data[i].strip()) return cls(*data)
Entries.choices['ICOOR_INTERNAL'] = (ICOOR_INTERNALEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class BONDEntry: """ dataclass class for both BOND and BOND_ENTRY. The ``__str__`` method will know based on ``.order``. The hash is the two atom names sorted. So BOND records with the same names will be equal. """ first: str second: str order: int = 1 # 2,3, 4|ARO def __post_init__(self): self.second = self.second.ljust(4) def __str__(self) -> str: if self.order == 1 or not self.order: return f'BOND {self.first: >4} {self.second: >4}' else: return f'BOND_TYPE {self.first: >4} {self.second: >4} {self.order}'
[docs] def _repr_html_(self): return f'{html_span("BOND")} {self.first} {self.second} {self.order}'
def __hash__(self): return hash('+'.join(sorted([self.first, self.second]))) def __eq__(self, other): return hash(self) == hash(other)
[docs] @classmethod def from_str(cls, text: str): rex = re.match('(?P<first>.{1,4}) (?P<second>.{2,4})\s?(?P<order>.*)', text.rstrip()) if rex is None: raise ValueError(f'BOND entry "{text}" is not formatted correctly') data = rex.groupdict() data['order'] = data['order'].strip() if data['order'] == '': data['order'] == 1 elif data['order'] in ('ARO', '4'): data['order'] == 4 # ARO is also acceptable. elif isinstance(data['order'], int): pass else: data['order'] = int(data['order'].strip()) return cls(**data)
Entries.choices['BOND'] = (BONDEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class ATOMEntry: # PDB atom name, Rosetta AtomType, MM AtomType, and charge name: str rtype: str mtype: str = 'X' partial: float = 0 def __str__(self) -> str: return f'ATOM {self.name: >4} {self.rtype: >4} {self.mtype: >4} {self.partial:.7f}'
[docs] def _repr_html_(self): return f'{html_span("ATOM")} atom:{self.name} '+\ f'rosetta-type:{self.rtype} '+\ f'm-type:{self.mtype} '+\ f'Gasteiger:{html_span(self.partial)}'
def __eq__(self, other): """ ``atomentry == 'CA'`` will return false because ``atomentry.name.strip() == 'CA'`` will return true. """ if isinstance(other, self.__class__): return self.name == other.name else: return False def __hash__(self): return hash(self.name)
[docs] @classmethod def from_str(cls, text: str): rex = re.match('(?P<name>.{1,4})\s*(?P<rtype>.{1,4})\s*(?P<mtype>.{1,4})\s*(?P<partial>[-\d\.]+)', text.rstrip()) if rex is None: raise ValueError(f'ATOM entry "{text}" is not formatted correctly') data = rex.groupdict() data['partial'] = float(data['partial']) return cls(**data)
Entries.choices['ATOM'] = (ATOMEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class CUT_BONDEntry: """ No idea what CUT_BOND is for. """ first: str second: str def __post_init__(self): self.second = self.second.ljust(4) def __str__(self) -> str: return f'CUT_BOND {self.first: >4} {self.second: >4}'
[docs] def _repr_html_(self): return f'{html_span("CUT_BOND")} {self.first} {self.second}'
[docs] @classmethod def from_str(cls, text: str): rex = re.match('\s?(?P<first>.{1,4})\s+(?P<second>.{1,4})', text.rstrip()) if rex is None: raise ValueError(f'CUT_BOND entry "{text}" is not formatted correctly') data = rex.groupdict() return cls(**data)
Entries.choices['CUT_BOND'] = (CUT_BONDEntry, Singletony.multiton) #########################################################################################################
[docs]@dataclass class CHARGEEntry: """ No idea if anything respects this. """ atom: str charge: int def __str__(self) -> str: return f'CHARGE {self.atom} FORMAL {self.charge}'
[docs] def _repr_html_(self): return f'{html_span("CHARGE")} {self.atom} {html_span(self.charge)}'
[docs] @classmethod def from_str(cls, text: str): rex = re.match('(?P<atom>\S+) FORMAL (?P<charge>\-?\+?\d)', text.rstrip()) if rex is None: raise ValueError(f'CHARGE entry "{text}" is not formatted correctly') data = rex.groupdict() return cls(**data)
Entries.choices['CHARGE'] = (CHARGEEntry, Singletony.multiton) #########################################################################################################
[docs]class PDB_ROTAMERSEntry(GenericEntry): """ This does zero checks for fine existance. """
[docs] def __init__(self, body: str): super().__init__(header='PDB_ROTAMERS', body=body)
Entries.choices['PDB_ROTAMERS'] = (PDB_ROTAMERSEntry, Singletony.singleton) #########################################################################################################
[docs]class ROTAMER_AAEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='ROTAMER_AA', body=body)
Entries.choices['ROTAMER_AA'] = (ROTAMER_AAEntry, Singletony.singleton) #########################################################################################################
[docs]class AAEntry(GenericEntry):
[docs] def __init__(self, body: str = 'UNK'): if body != 'UNK': self.log.info('AA should be UNK... tolerating oddity.') super().__init__(header='AA', body=body)
Entries.choices['AA'] = (AAEntry, Singletony.singleton) #########################################################################################################
[docs]class TYPEEntry(GenericEntry): """ LIGAND or POLYMER. No exceptions. """
[docs] def __init__(self, body: str = 'LIGAND'): assert body in ('POLYMER', 'LIGAND'), f'residue TYPE {body} is neither POLYMER or LIGAND' super().__init__(header='TYPE', body=body)
Entries.choices['TYPE'] = (TYPEEntry, Singletony.singleton) #########################################################################################################
[docs]class ADD_RINGEntry(GenericEntry): ## To be fixed. Spacing drama...
[docs] def __init__(self, body: str): self.log.info('ADD_RING is sloppily coded. the values are stored as an unsplit string!') super().__init__(header='ADD_RING', body=body)
Entries.choices['ADD_RING'] = (ADD_RINGEntry, Singletony.multiton) #########################################################################################################
[docs]class PROPERTIESEntry(GenericListEntry): # https://graylab.jhu.edu/PyRosetta.documentation/pyrosetta.rosetta.core.chemical.html#pyrosetta.rosetta.core.chemical.ResidueProperty
[docs] def __init__(self, *args: str): super().__init__('PROPERTIES', *args)
Entries.choices['PROPERTIES'] = (PROPERTIESEntry, Singletony.list_singleton) #########################################################################################################
[docs]class VARIANTEntry(GenericListEntry): # do multiple variant entries get the same line??
[docs] def __init__(self, *args: str): super().__init__('VARIANT', *args)
Entries.choices['VARIANT'] = (VARIANTEntry, Singletony.list_singleton) #########################################################################################################
[docs]class FIRST_SIDECHAIN_ATOMEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='FIRST_SIDECHAIN_ATOM', body=body)
Entries.choices['FIRST_SIDECHAIN_ATOM'] = (FIRST_SIDECHAIN_ATOMEntry, Singletony.singleton)
[docs]class BACKBONE_AAEntry(GenericEntry):
[docs] def __init__(self, body: str): assert len(body) == 3, f'{body} is not 3 char long' super().__init__(header='BACKBONE_AA', body=body.upper())
Entries.choices['BACKBONE_AA'] = (BACKBONE_AAEntry, Singletony.singleton) #########################################################################################################
[docs]class RAMA_PREPRO_FILENAMEEntry(GenericEntry):
[docs] def __init__(self, body: str): super().__init__(header='RAMA_PREPRO_FILENAME', body=body)
Entries.choices['RAMA_PREPRO_FILENAME'] = (RAMA_PREPRO_FILENAMEEntry, Singletony.singleton) #########################################################################################################
[docs]class METAL_BINDING_ATOMSEntry(GenericListEntry):
[docs] def __init__(self, *args: str): super().__init__('METAL_BINDING_ATOMS', *args)
Entries.choices['METAL_BINDING_ATOMS'] = (METAL_BINDING_ATOMSEntry, Singletony.singleton) #########################################################################################################
[docs]class ACT_COORD_ATOMSEntry(GenericListEntry):
[docs] def __init__(self, *args: str): super().__init__('ACT_COORD_ATOMS', *args)
Entries.choices['ACT_COORD_ATOMS'] = (ACT_COORD_ATOMSEntry, Singletony.singleton) #########################################################################################################
[docs]class UNKNOWNEntry(GenericEntry):
[docs] @classmethod def from_str(cls, text): return cls(**re.match('(?P<header>\w+) (?P<body>.*)^', text.rstrip()).groupdict())
Entries.choices['<UNKNOWN>'] = (UNKNOWNEntry, Singletony.multiton) #########################################################################################################