Source code for molecupy.structures.chains

"""Contains classes for macrostructures made of residues."""

import re
from .molecules import AtomicStructure, Residue, SmallMolecule
from ..exceptions import NoResiduesError, BrokenHelixError, BrokenStrandError, DuplicateResiduesError

[docs]class ResiduicStructure(AtomicStructure): """Base class: :py:class:`.AtomicStructure` The base class for all structures which can be described as a set of residues. :param residues: A sequence of :py:class:`.Residue` objects in this\ structure.""" def __init__(self, *residues): if len(residues) == 0: raise NoResiduesError("Cannot make a ResiduicStructure with no residues") for residue in residues: if not isinstance(residue, Residue): raise TypeError( "Can only make ResiduicStructure with Residues, not '%s'" % str(residue) ) residue_ids = [residue.residue_id() for residue in residues] if len(residue_ids) != len(set(residue_ids)): duplicates = [id_ for id_ in residue_ids if residue_ids.count(id_) > 1] raise DuplicateResiduesError( "There are multiple residues with ID %s" % (duplicates[0]) ) self._residues = set(residues) def __repr__(self): return "<%s (%i residues)>" % (self.__class__.__name__, len(self._residues)) def __getattr__(self, attribute): if attribute == "_atoms": atoms = set() for residue in self.residues(): atoms.update(residue.atoms(atom_type="all")) return atoms else: return self.__getattribute__(attribute)
[docs] def residues(self, include_missing=True): """Returns the residues in this structure as a ``set``. :param str include_missing: If ``False`` only residues present in the\ PDB coordinates will be returned, and not missing ones. :rtype: ``set``""" if include_missing: return set(self._residues) else: return set([res for res in self._residues if not res.is_missing()])
[docs] def add_residue(self, residue): """Adds a residue to the structure. :param Residue residue: The residue to add.""" if not isinstance(residue, Residue): raise TypeError( "Can only add Residues to ResiduicStructures, not '%s'" % str(residue) ) if residue.residue_id() in [residue.residue_id() for residue in self.residues()]: raise DuplicateResiduesError( "Cannot add residue with ID %i to %s as there is already a residue with that ID" % ( residue.residue_id(), self ) ) self._residues.add(residue)
[docs] def remove_residue(self, residue): """Removes a residue from the structure. :param Residue residue: The residue to remove.""" self._residues.remove(residue)
[docs] def get_residue_by_id(self, residue_id): """Returns the first residue that matches a given residue ID. :param str residue_id: The residue ID to search by. :rtype: :py:class:`.Residue` or ``None``""" if not isinstance(residue_id, str): raise TypeError( "Residue ID search must be by str, not '%s'" % str(residue_id) ) for residue in self.residues(): if residue.residue_id() == residue_id: return residue
[docs] def get_residues_by_name(self, residue_name, include_missing=True): """Returns all the residues of a given name. :param str residue_name: The name to search by. :param str include_missing: If ``False`` only residues present in the\ PDB coordinates will be returned, and not missing ones. :rtype: ``set`` of :py:class:`.Residue` objects.""" if not isinstance(residue_name, str): raise TypeError( "Residue name search must be by str, not '%s'" % str(residue_name) ) return set([ residue for residue in self.residues(include_missing=include_missing) if residue.residue_name() == residue_name ])
[docs] def get_residue_by_name(self, residue_name, include_missing=True): """Returns the first residue that matches a given name. :param str residue_name: The name to search by. :param str include_missing: If ``False`` only residues present in the\ PDB coordinates will be returned, and not missing ones. :rtype: :py:class:`.Residue` or ``None``""" if not isinstance(residue_name, str): raise TypeError( "Residue name search must be by str, not '%s'" % str(residue_name) ) for residue in self.residues(include_missing=include_missing): if residue.residue_name() == residue_name: return residue
[docs]class ResiduicSequence(ResiduicStructure): """Base class: :py:class:`ResiduicStructure` The base class for all structures which can be described as a sequence of residues. :param residues: A sequence of :py:class:`.Residue` objects in this\ structure.""" def __init__(self, *residues): ResiduicStructure.__init__(self, *residues) self._residues = list(residues) def __len__(self): return len(self._residues) def __iter__(self): return iter(self._residues) def __getitem__(self, key): return self._residues.__getitem__(key)
[docs] def residues(self, include_missing=True): """Returns the residues in this structure as a ``list``. :param str include_missing: If ``False`` only residues present in the\ PDB coordinates will be returned, and not missing ones. :rtype: ``list``""" if include_missing: return list(self._residues) else: return [res for res in self._residues if not res.is_missing()]
[docs] def add_residue(self, residue): """Adds a residue to the end of this sequence. :param Residue residue: The residue to add.""" if not isinstance(residue, Residue): raise TypeError( "Can only add Residues to ResiduicSequences, not '%s'" % str(residue) ) self._residues.append(residue)
[docs] def sequence_string(self, include_missing=True): """Return the protein sequence of this chain as one letter codes. :param str include_missing: If ``False`` only residues present in the\ PDB coordinates will be returned, and not missing ones. :rtype str: The protein sequence.""" return "".join([RESIDUES.get( res.residue_name().upper(), "X" ) for res in self.residues(include_missing=include_missing)])
[docs]class Chain(ResiduicSequence): """Base class: :py:class:`ResiduicSequence` Represents chains - the polymeric units that make up most of PDB structures. :param chain_id: The chain's ID. :param residues: The residues in this chain.""" def __init__(self, chain_id, *residues): if not isinstance(chain_id, str): raise TypeError("'%s' is not a valid chain_id" % str(chain_id)) if not re.match(r"^[A-Z]$", chain_id): raise ValueError( "chain_id must be a single upper case letter - not '%s'" % chain_id ) self._chain_id = chain_id ResiduicSequence.__init__(self, *residues) for residue in self._residues: residue._chain = self self._alpha_helices = set() self._beta_strands = set() self._model = None self._complex = None def __repr__(self): return "<Chain %s (%i residues)>" % (self._chain_id, len(self._residues))
[docs] def chain_id(self): """Returns the chain's ID. :rtype: ``str``""" return self._chain_id
def add_residue(self, residue): ResiduicSequence.add_residue(self, residue) residue._chain = self def remove_residue(self, residue): ResiduicSequence.remove_residue(self, residue) residue._chain = None
[docs] def alpha_helices(self): """Returns the :py:class:`AlphaHelix` objects on this chain. :returns: ``set`` of ``AlphaHelix`` objects""" return set(self._alpha_helices)
[docs] def beta_strands(self): """Returns the :py:class:`BetsStrand` objects on this chain. :returns: ``set`` of ``BetaStrand`` objects""" return set(self._beta_strands)
[docs] def model(self): """Returns the :py:class:`.Model` that the chain inhabits. :rtype: ``Model``""" return self._model
[docs] def complex(self): """Returns the :py:class:`.Complex` that the chain is a part of. :rtype: ``Model``""" return self._complex
[docs] def get_helix_by_id(self, helix_id): """Returns the first alpha helix that matches a given helix ID. :param str helix_id: The helix ID to search by. :rtype: :py:class:`.AlphaHelix` or ``None``""" if not isinstance(helix_id, str): raise TypeError("Helix ID search must be by str, not '%s'" % str(helix_id)) for helix in self.alpha_helices(): if helix.helix_id() == helix_id: return helix
[docs] def get_strand_by_id(self, strand_id): """Returns the first beta strand that matches a given strand ID. :param str strand_id: The strand ID to search by. :rtype: :py:class:`.BetsStrand` or ``None``""" if not isinstance(strand_id, str): raise TypeError("Strand ID search must be by str, not '%s'" % str(strand_id)) for strand in self.beta_strands(): if strand.strand_id() == strand_id: return strand
[docs]class BindSite(ResiduicStructure): """Base class: :py:class:`ResiduicStructure` Represents binding sites - the residue clusters that mediate ligand binding. :param site_id: The site's ID. :param residues: The residues in this chain.""" def __init__(self, site_id, *residues): if not isinstance(site_id, str): raise TypeError("'%s' is not a valid site_id" % str(site_id)) self._site_id = site_id self._ligand = None self._model = None ResiduicStructure.__init__(self, *residues) def __repr__(self): return "<BindSite %s (%i residues)>" % (self._site_id, len(self._residues))
[docs] def site_id(self): """Returns the site's ID. :rtype: ``str``""" return self._site_id
[docs] def ligand(self, ligand=None): """Returns or sets the site's :py:class:`.SmallMolecule` ligand. :param SmallMolecule ligand: If given, the ligand will be set to this. :rtype: ``SmallMolecule``""" if ligand is None: return self._ligand else: if not isinstance(ligand, SmallMolecule): raise TypeError( "'%s' is not a valid ligand" % str(ligand) ) self._ligand = ligand ligand._bind_site = self
[docs] def model(self): """Returns the :py:class:`.Model` that the site inhabits. :rtype: ``Model``""" return self._model
[docs] def continuous_sequence(self): """If the residues are on the same chain, this will return a continuous sequence that contains all residues in this site, otherwise ``None``. :rtype: ResiduicSequence""" if len(set([res.chain() for res in self.residues() if res.chain()])) == 1: chain = list(self.residues())[0].chain() min_index = min([chain.residues().index(res) for res in self.residues()]) max_index = max([chain.residues().index(res) for res in self.residues()]) return ResiduicSequence(*chain.residues()[min_index:max_index]) else: return None
[docs]class AlphaHelix(ResiduicSequence): """Base class: :py:class:`ResiduicSequence` Represents alpha helices. :param str helix_id: The helix's ID. :param residues: The residues in this helix. :param str helix_class: The classification of the helix. :param str comment: Any comment associated with this helix.""" def __init__(self, helix_id, *residues, helix_class=None, comment=None): if not isinstance(helix_id, str): raise TypeError("'%s' is not a valid helix_id" % str(helix_id)) self._helix_id = helix_id if len(set([res.chain() for res in residues])) != 1: raise BrokenHelixError( "Cannot make helix %s with residues from multiple chains" % helix_id ) ResiduicSequence.__init__(self, *residues) if helix_class is not None and not isinstance(helix_class, str): raise TypeError("'%s' is not a valid helix_class" % str(helix_class)) self._helix_class = helix_class if comment is not None and not isinstance(comment, str): raise TypeError("'%s' is not a valid comment" % str(comment)) self._comment = comment if self.chain(): self.chain()._alpha_helices.add(self) def __repr__(self): return "<AlphaHelix %s (%i residues)>" % (self._helix_id, len(self._residues))
[docs] def helix_id(self): """Returns the helix's ID. :rtype: ``str``""" return self._helix_id
[docs] def helix_class(self, helix_class=None): """Returns or sets the helix's classification. :param str helix_class: If given, the class will be set to this. :rtype: ``str``""" if helix_class is None: return self._helix_class else: if not isinstance(helix_class, str): raise TypeError( "'%s' is not a valid helix_class" % str(helix_class) ) self._helix_class = helix_class
[docs] def comment(self, comment=None): """Returns or sets the helix's comment. :param str comment: If given, the comment will be set to this. :rtype: ``str``""" if comment is None: return self._comment else: if not isinstance(comment, str): raise TypeError( "'%s' is not a valid comment" % str(comment) ) self._comment = comment
[docs] def chain(self): """Returns the chain that this helix is on. :rtype: ``Chain``""" return self.residues()[0].chain()
def add_residue(self, residue): if residue.chain() is not self.chain(): raise BrokenHelixError( "Cannot add %s to %s as their chains don't match" % (str(residue), str(self)) ) ResiduicSequence.add_residue(self, residue)
[docs]class BetaStrand(ResiduicSequence): """Base class: :py:class:`ResiduicSequence` Represents beta strands. :param str strand_id: The strand's ID. :param residues: The residues in this strand. :param int sense: The sense of the strand with respect to the prior\ strand.""" def __init__(self, strand_id, sense, *residues): if not isinstance(strand_id, str): raise TypeError("'%s' is not a valid strand_id" % str(strand_id)) self._strand_id = strand_id if not isinstance(sense, int): raise TypeError("'%s' is not a valid sense value" % str(sense)) if not (-1 <= sense <= 1): raise ValueError("sense can only be -1, 0 or 1 - not %i" % sense) self._sense = sense if len(set([res.chain() for res in residues])) != 1: raise BrokenStrandError( "Cannot make strand %s with residues from multiple chains" % strand_id ) ResiduicSequence.__init__(self, *residues) if self.chain(): self.chain()._beta_strands.add(self) def __repr__(self): return "<BetaStrand %s (%i residues)>" % (self._strand_id, len(self._residues))
[docs] def strand_id(self): """Returns the strand's ID. :rtype: ``str``""" return self._strand_id
[docs] def sense(self, sense=None): """Returns or sets the strand's sense with respect to the previous strand. :param int sense: If given, the sense will be set to this. :rtype: ``int``""" if sense is None: return self._sense else: if not isinstance(sense, int): raise TypeError( "'%s' is not a valid sense value" % str(sense) ) if not (-1 <= sense <= 1): raise ValueError("sense can only be -1, 0 or 1 - not %i" % sense) self._sense = sense
[docs] def chain(self): """Returns the chain that this strand is on. :rtype: ``Chain``""" return self.residues()[0].chain()
def add_residue(self, residue): if residue.chain() is not self.chain(): raise BrokenStrandError( "Cannot add %s to %s as their chains don't match" % (str(residue), str(self)) ) ResiduicSequence.add_residue(self, residue)
RESIDUES = { "GLY": "G", "ALA": "A", "LEU": "L", "MET": "M", "PHE": "F", "TRP": "W", "LYS": "K", "GLN": "Q", "GLU": "E", "SER": "S", "PRO": "P", "VAL": "V", "ILE": "I", "CYS": "C", "TYR": "Y", "HIS": "H", "ARG": "R", "ASN": "N", "ASP": "D", "THR": "T" }