import uuid
import motif_type
import exceptions
[docs]class Residue(object):
"""
An extremely stripped down container object for use for keeping track of
secondary structure using dot bracket notation. Dot bracket notation is
represented by a string of equal length of a sequence, example:
sequence: 'GCAAAACG'\n
dot bracket: '((....))'
'(' represent the 5' end of a basepair, ')' the 3' end of the same base
pair. Thus there should be an equal number of '(' and ')' symbols. '.'
represents an unpaired residue.
This class along with others in this module are generally
not instatinated outside RNAStructure, Motif and Pose.
:param name: name of residue, etc A, G, C, U
:param dot_bracket: dot bracket notation for secondary structure either '(', '.', ')'
:param num: number of residue
:param chain_id: chain id of residue, etc "A" or "B"
:param uuid: residue unique indentifier
:param i_code: residue insertaion code, usually ""
:type name: str
:type dot_bracket: str
:type num: int
:type chain_id: str
:type uuid: uuid.uuid1
:type i_code: str
:attributes:
`name`: str
name of residue, etc A, G, C, U
`dot_bracket`: str
dot bracket notation for secondary structure either '(', '.', ')'
`num`: int
number of residue
`chain_id`: str
chain id of residue, etc "A" or "B"
`uuid`: uuid.uuid1
residue unique indentifier
`i_code`: str
residue insertaion code, usually ""
:examples:
.. code-block:: python
# create a new residue
>>> r = Residue("G", "(", 10, "A", uuid.uuid1())
"""
__slots__= ["name", "dot_bracket", "num", "chain_id", "uuid", "i_code"]
def __init__(self, name, dot_bracket, num, chain_id, uuid, i_code=""):
self.name, self.dot_bracket, self.num = name, dot_bracket, num
self.uuid = uuid
self.chain_id, self.i_code = chain_id, i_code
def __repr__(self):
return "<SecondaryStructureResidue('%s%d%s chain %s')>" % (
self.name, self.num, self.i_code, self.chain_id)
[docs] def to_str(self):
"""
stringify residue object. can be converted back with
:func:`str_to_residue`
:return: stringifed verision of residue
:rtype: str
"""
return self.name + "," + self.dot_bracket + "," + str(self.num) + "," + \
str(self.chain_id) + "," + str(self.i_code)
[docs] def copy(self):
"""
creates copy of current residue
:return: copy of instatnce
:rtype: secondary_structure.Residue
"""
return Residue(self.name, self.dot_bracket, self.num, self.chain_id,
self.uuid, self.i_code)
[docs]class Chain(object):
"""
secondary structure chain container. Contains a chain of connected
residues. Chain should be from 5' to 3'.
:param residues: the residues that are to be included in chain. Optional.
:type residues: list of Residues.
:attributes:
`residues` : list of Residues
Residues contained in current chain
"""
__slots__ = ["residues"]
def __init__(self, residues=None):
self.residues = residues
if self.residues is None:
self.residues = []
def __len__(self):
return len(self.residues)
def __repr__(self):
seq = ""
for r in self.residues:
seq += r.name
return "<SecondaryStructureChain( " + seq + ")"
def __iter__(self):
return self.residues.__iter__()
[docs] def first(self):
"""
gets the first residue in the chain
:return: 5' end of chain
:rtype: secondary_structure.Residue
:examples:
.. code-block:: python
>>> from rnamake.unittests import instances
>>> c = instances.secondary_structure_chain()
>>> c.first()
<SecondaryStructureResidue('G13 chain A')>
"""
if len(self.residues) == 0:
raise exceptions.SecondaryStructureException(
"cannot call first there are no residues in chain")
return self.residues[0]
[docs] def last(self):
"""
gets the first residue in the chain
:return: 5' end of chain
:rtype: secondary_structure.Residue
:examples:
.. code-block:: python
>>> from rnamake.unittests import instances
>>> c = instances.secondary_structure_chain()
>>> c.last()
<SecondaryStructureResidue('G24 chain A')>
"""
if len(self.residues) == 0:
raise exceptions.SecondaryStructureException(
"cannot call last there are no residues in chain")
return self.residues[-1]
[docs] def sequence(self):
"""
gets the string verision of the sequence in this chain
:returns: string of sequence of chain
:rtype: str
:examples:
.. code-block:: python
>>> from rnamake.unittests import instances
>>> c = instances.secondary_structure_chain()
# see the residue of each residue in the chain, all Gs
>>> for r in c: print r
...
<SecondaryStructureResidue('G13 chain A')>
<SecondaryStructureResidue('G14 chain A')>
<SecondaryStructureResidue('G15 chain A')>
<SecondaryStructureResidue('G16 chain A')>
<SecondaryStructureResidue('G17 chain A')>
<SecondaryStructureResidue('G18 chain A')>
<SecondaryStructureResidue('G19 chain A')>
<SecondaryStructureResidue('G20 chain A')>
<SecondaryStructureResidue('G21 chain A')>
<SecondaryStructureResidue('G22 chain A')>
<SecondaryStructureResidue('G23 chain A')>
<SecondaryStructureResidue('G24 chain A')>
>>> c.sequence()
u'GGGGGGGGGGGG'
"""
seq = ""
for r in self.residues:
seq += r.name
return seq
[docs] def dot_bracket(self):
"""
gets the string verision of the secondary structure in
dot_bracket notation of this chain
:returns: string of secondary structure in dot bracket notation of
chain
:rtype: str
:examples:
.. code-block:: python
>>> from rnamake.unittests import instances
>>> c = instances.secondary_structure_chain()
>>> c.dot_bracket()
u'(((((((((((('
"""
db = ""
for r in self.residues:
db += r.dot_bracket
return db
[docs] def to_str(self):
"""
stringify chain object. can be converted back with
:func:`str_to_chain`
:return: stringifed verision of chain
:rtype: str
"""
s = ""
for r in self.residues:
s += r.to_str() + ";"
return s
[docs] def copy(self):
"""
creates deep copy of chain instance
:return: copy of chain
:rtype: secondary_structure.Chain
"""
residues = []
for r in self.residues:
residues.append(r.copy())
return Chain(residues)
[docs]class Basepair(object):
"""
:param res1: First residue in basepair
:param res2: Second residue in basepair
:param bp_uuid: basepair unique indentifier
:type res1: secondary_structure.Residue
:type res2: secondary_structure.Residue
:type bp_uuid: uuid.uuid1
:attributes:
`res1` : secondary_structure.Residue
First residue in basepair
`res2` : secondary_structure.Residue
Second residue in basepair
`uuid`: uuid.uuid1
unique id to indentify this basepair when locating it in a motif or
pose
"""
__slots__ = ["res1", "res2", "uuid"]
def __init__(self, res1, res2, bp_uuid=None):
self.res1, self.res2 = res1, res2
self.uuid = bp_uuid
if self.uuid is None:
self.uuid = uuid.uuid1()
def __repr__(self):
return "<SecondaryStructureBasepair("+self.name()+")>"
[docs] def name(self):
"""
get name of basepair: which is the combined name of both residues
seperated by a "-". The residue with the lower res number should
come first
:return: name of basepair
:rtype: str
:examples:
.. code-block:: python
# build basepair from stratch
>>> from rnamake.unittests import instances
>>> b = instances.secondary_structure_basepair()
>>> print b.res1
<SecondaryStructureResidue('C10 chain A')>
>>> print b.res2
<SecondaryStructureResidue('G15 chain A')>
>>> print b.name()
A10-A15
"""
str1 = self.res1.chain_id+str(self.res1.num)+str(self.res1.i_code)
str2 = self.res2.chain_id+str(self.res2.num)+str(self.res2.i_code)
if str1 < str2:
return str1+"-"+str2
else:
return str2+"-"+str1
[docs] def partner(self, r):
"""
get the other basepairing partner of a residue will throw an error
if the supplied residue is not contained within this basepair
:param res: the residue that you want to get the partner of
:type res: secondary_structure.Residue object
"""
if r == self.res1:
return self.res2
elif r == self.res2:
return self.res1
else:
raise exceptions.SecondaryStructureException(
"call partner with a residue not in basepair")
[docs]class Structure(object):
"""
lightweight container class for storing secondary structure information
for an entire RNA.
:param chains: secondary_structure.Chains that belong to this structure,
this is done when a structure is being built from an existing 3D
structure.Structure instance
:param sequence: sequence of RNA of interest, e.g. "AAAGGGCCC",
:param dot_bracket: dot bracket notation of the secondary structure of
RNA of interes, e.g. "(((())))"
:type chains: list of secondary_structure.Chains
:type sequence: str
:type dot_bracket: str
:attributes:
`chains`: list of secondary_structure.Chains
the chains of RNA residues in this structure
:examples:
.. code-block:: python
# create a new structure
>>> from rnamake import secondary_structure
>>> s = secondary_structure.Structure(sequence="GCGAAAACGC",
dot_bracket="(((....)))")
>>> print s.sequence()
GCGAAAACGC
>>> print s.dot_bracket()
(((....)))
>>> s.get_residue(num=1)
<SecondaryStructureResidue('G1 chain A')>
"""
__slots__ = ["chains"]
def __init__(self, chains=None, sequence="", dot_bracket=""):
self.chains = []
if chains is not None:
self.chains = chains
if len(sequence) != 0 and len(dot_bracket) != 0:
self.chains = self._setup_chains(sequence, dot_bracket)
def _setup_chains(self, sequence, dot_bracket):
"""
setup function for turning a string sequence and secondary structure
into a structure object.
:param sequence: sequence of RNA
:param dot_bracket: dot bracket of RNA
:type sequence: str
:type dot_bracket: str
"""
chains = []
residues = []
if len(dot_bracket) != len(sequence):
raise exceptions.SecondaryStructureException(
"sequence and dot bracket are not the same length")
if dot_bracket[0] != '(' and dot_bracket[0] != '.' and dot_bracket != '&':
raise exceptions.SecondaryStructureException(
"secondary structure is not valid did you flip seq and ss?")
count = 1
chains_ids = "ABCDEFGHIJKLMNOPQRSTUVWXZ"
valid_seq = "AGUCTN&+-"
chain_i = 0
for i in range(len(sequence)):
if sequence[i] not in valid_seq:
raise exceptions.SecondaryStructureException(
sequence[i] + " is not a valid secondary_structure element")
if sequence[i] != "&" and sequence[i] != "+" and sequence[i] != "-":
r = Residue(sequence[i], dot_bracket[i], count,
chains_ids[chain_i], uuid.uuid1())
residues.append(r)
count += 1
else:
chain_i += 1
chains.append(Chain(residues))
# unlikely but hit max chains
if chain_i == len(chains_ids)-1:
chain_i = 0
residues = []
if len(residues) > 0:
chains.append(Chain(residues))
return chains
[docs] def residues(self):
"""
Concats all residue objects from all Chain objects intos a unified
list to be able to easily iterate through.
:return: List of secondary_structure.Residue objects
"""
res = []
for c in self.chains:
res.extend(c.residues)
return res
[docs] def sequence(self):
"""
Concats the sequence of each Chain into one sequence for the entire
RNA
:return: sequence of structure
:rtype: seq
"""
sequences = [x.sequence() for x in self.chains]
return "&".join(sequences)
[docs] def dot_bracket(self):
"""
Concats the secondary structure in the form of dot bracket notation
of each Chain into one sequence for the entire RNA
:return: sequence of structure
:rtype: seq
"""
dot_brackets = [x.dot_bracket() for x in self.chains]
return "&".join(dot_brackets)
[docs] def get_residue(self, num=None, chain_id=None, i_code=None, uuid=None):
"""
find a residue based on residue num, chain_id, insert_code and uuid
will return an error if more then one residue matches search to avoid
confusion. Will return None is nothing matches search.
:param num: residue number
:param chain_id: what chain the residue belongs to
:param i_code: the insertation code of the residue
:param uuid: the unique indentifier that each residue is given
:type num: int
:type chain_id: str
:type i_code: str
:type uuid: uuid
:return: Residue object
:rtype: residue.Residue
:examples:
.. code-block:: python
>>> from rnamake import secondary_structure
>>> s = secondary_structure.Structure(sequence="GCGAAAACGC",
dot_bracket="(((....)))")
>>> s.get_residue(num=1)
<SecondaryStructureResidue('G1 chain A')>
"""
# nothing specified
if num is None and chain_id is None and i_code is None and uuid is None:
raise exceptions.SecondaryStructureException(
"called get_residue wiht no arguments")
found = []
for r in self.residues():
if num is not None and num != r.num:
continue
if i_code is not None and i_code != r.i_code:
continue
if chain_id is not None and chain_id != r.chain_id:
continue
if uuid is not None and uuid != r.uuid:
continue
found.append(r)
if len(found) == 0:
return None
if len(found) > 1:
raise exceptions.SecondaryStructureException(
"found multiple residues in get_residue(), narrow " +
"your search")
return found[0]
[docs] def copy(self):
"""
creates a deep copy of structure instance
:return: copy of structure
:rtype: secondary_structure.Structure
"""
new_chains = [ c.copy() for c in self.chains]
return Structure(chains=new_chains)
[docs] def to_str(self):
"""
generates a stringified verision of this instance.
:returns: stringified verision of structure
:rtype: str
"""
s = ""
for c in self.chains:
s += c.to_str() + "|"
return s
[docs]class RNAStructure(object):
"""
Complete secondary structure container for representing a RNA. Contains
both the sequence indentity of each residue with its corresponding dot
bracket notation symbol but also includes basepair objects to represent the
pairs between residues. This class parallels the
rna_structure.RNAStructure class for describing RNA with 3D coordinates.
Having a parallel class for secondary structure makes it simple to move
between secondary structure and full atom representations of RNA.
RNAStructure is rarely called directly but serves as an abstract class
for both Motif and Pose so for example use please see those classes.
:param structure: structure containing residue and chain information for
this RNAStructure instance
:param basepairs: basepairs contained in RNAStructure
:param ends: the basepairs at the end of chains. These define connection
points to other RNAStructures
:param name: name of RNAStructure
:param path: location of where RNAStructure originated from, this is just
a place holder for converting from rna_structure.RNAStructure
:param score: the score generated by motif_scorer.MotifScorer
:param end_ids: strings indenifying the secondary structure and sequence
in the perspective of a given basepair end see
:func:`assign_end_id_new`
:type structure: secondary_structure.Structure
:type basepairs: list of secondary_structure.Basepairs
:type ends: list of secondary_structure.Basepairs
:type name: str
:type path: str
:type score: float
:type end_ids: list of strs
:attributes:
`structure`: secondary_structure.Structure
structure containing residue and chain information for
this RNAStructure instance
`basepairs`: list of secondary_structure.Basepairs
Basepairs between residues
`ends`: list of secondary_structure.Basepairs
Basepair ends where RNA structures can be connected
`name`: str
the name of the RNAStructure
`path`: str
location of where RNAStructure originated from, this is just
a place holder for converting from rna_structure.RNAStructure
`score` : float
the score generated by motif_scorer.MotifScorer, estimates secondary
structure stability
`end_ids`: list of strs
strings indenifying the secondary structure and sequence
in the perspective of a given basepair end see
:func:`assign_end_id_new`
"""
def __init__(self, structure=None, basepairs=None, ends=None,
name="assembled", path="assembled", score=0, end_ids=None):
self.structure = structure
if self.structure is None:
self.structure = Structure()
self.basepairs =basepairs
if self.basepairs is None:
self.basepairs = []
self.name = name
self.path = path
self.score = score
self.ends = ends
if self.ends is None:
self.ends = []
self.end_ids = end_ids
if self.end_ids is None:
self.end_ids = []
def __repr__(self):
return "<secondary_structure.RNAStructure( " + self.sequence() + " " + self.dot_bracket() + " )"
[docs] def get_residue(self, num=None, chain_id=None, i_code=None, uuid=None):
"""
wrapper for :func:`Structure.get_residue`
"""
return self.structure.get_residue(num, chain_id, i_code, uuid)
[docs] def get_basepair(self, res1=None, res2=None, uuid=None, name=None):
"""
Finds a specific basepair based on many possible parameters.
:param res1: first residue to be included in basepair
:param res2: second residue to be included in basepair
if both res1 and res2 are specified the basepair must be a pair
between these two.
:param uuid: basepair unique indentifier
:param name: name of basepair, from function Basepair.name()
:type res1: secondary_structure.Residue
:type res2: secondary_structure.Residue
:type uuid: uuid.uuid1
:type name: str
"""
if res1 is None and res2 is None and uuid is None and name is None:
raise exceptions.SecondaryStructureException(
"no arguments specified for get_basepair()")
for bp in self.basepairs:
if res1 is not None and (bp.res1 != res1 and bp.res2 != res1):
continue
if res2 is not None and (bp.res1 != res2 and bp.res2 != res2):
continue
if uuid is not None and bp.uuid != uuid:
continue
if name is not None and bp.name() != name:
continue
return bp
return None
[docs] def sequence(self):
"""
wrapper for :func:`Structure.sequence`
"""
return self.structure.sequence()
[docs] def dot_bracket(self):
"""
wrapper for :func:`Structure.dot_bracket`
"""
return self.structure.dot_bracket()
[docs] def replace_sequence(self, seq):
"""
changes the sequence of structure.
:param seq: the new sequence of structure
:type seq: str
:returns: None
"""
spl = seq.split("&")
seq2 = "".join(spl)
if len(seq2) != len(self.structure.residues()):
raise exceptions.SecondaryStructureException(
"cannot replace sequence, sequence length is different then "
"the number of residues")
for i, r in enumerate(self.structure.residues()):
r.name = seq2[i]
[docs] def residues(self):
"""
wrapper for :func:`Structure.residues`
"""
return self.structure.residues()
[docs] def chains(self):
"""
wrapper for :func:`Structure.chains`
"""
return self.structure.chains
[docs] def copy(self):
"""
creates deep copy of RNAStructure instance
:returns: copy of instance
:rtype: RNAStructure
"""
n_ss = self.structure.copy()
basepairs, ends = [], []
for bp in self.basepairs:
new_bp = Basepair(n_ss.get_residue(uuid=bp.res1.uuid),
n_ss.get_residue(uuid=bp.res2.uuid),
bp.uuid)
basepairs.append(new_bp)
for end in self.ends:
i = self.basepairs.index(end)
ends.append(basepairs[i])
return RNAStructure(n_ss, basepairs, ends, self.name, self.path,
self.score, self.end_ids[::])
[docs]class Motif(RNAStructure):
"""
Complete secondary structure container for representing an RNA Motif.
Contains both the sequence indentity of each residue with its corresponding
dot bracket notation symbol but also includes basepair objects to represent
the pairs between residues. This class parallels the motif.Motif class for
describing RNA with 3D coordinates.
Having a parallel class for secondary structure makes it simple to move
between secondary structure and full atom representations of RNA.
:param structure: structure containing residue and chain information for
this RNAStructure instance
:param basepairs: basepairs contained in RNAStructure
:param ends: the basepairs at the end of chains. These define connection
points to other Motifs
:param name: name of Motif
:param path: location of where Motif originated from, this is just
a place holder for converting from motif.Motif
:param mtype: motif_type enum value, to indentify what type of motif this
is
:param score: the score generated by motif_scorer.MotifScorer
:param end_ids: strings indenifying the secondary structure and sequence
in the perspective of a given basepair end see
:func:`assign_end_id_new`
:param id: unique indentifer for motif
:param r_struct: an RNAStructure instance to setup from
:type structure: secondary_structure.Structure
:type basepairs: list of secondary_structure.Basepairs
:type ends: list of secondary_structure.Basepairs
:type name: str
:type path: str
:type mtype: motif_type
:type score: float
:type end_ids: list of strs
:type id: uuid.uuid1
:type r_structure: secondary_structure.RNAStructure
:attributes:
`structure`: secondary_structure.Structure
structure containing residue and chain information for
this Motif instance
`basepairs`: list of secondary_structure.Basepairs
Basepairs between residues
`ends`: list of secondary_structure.Basepairs
Basepair ends where RNA structures can be connected
`name`: str
the name of the Motif
`path`: str
location of where Motif originated from, this is just
a place holder for converting from motif.Motif
`score` : float
the score generated by motif_scorer.MotifScorer, estimates secondary
structure stability
`end_ids`: list of strs
strings indenifying the secondary structure and sequence
in the perspective of a given basepair end see
:func:`assign_end_id_new`
"""
def __init__(self, structure=None, basepairs=None, ends=None,
name="assembled", path="assembled", mtype=motif_type.UNKNOWN,
score=0, end_ids=None, id=None, r_struct=None):
self.structure = structure
if self.structure is None:
self.structure = Structure()
self.basepairs = basepairs
if self.basepairs is None:
self.basepairs = []
self.name = name
self.path = path
self.score = score
self.ends = ends
if self.ends is None:
self.ends = []
self.end_ids = end_ids
if self.end_ids is None:
self.end_ids = []
self.mtype = mtype
self.id = id
if self.id is None:
self.id = uuid.uuid1()
if r_struct is not None:
self.__dict__.update(r_struct.__dict__)
def __repr__(self):
return "<secondary_structure.Motif( " + self.sequence() + " " + self.dot_bracket() + " )"
[docs] def copy(self):
"""
creates a deep copy of Motif instance
:returns: deep copy of instance
:rtype: secondary_structure.Motif
"""
n_ss = self.structure.copy()
basepairs, ends = [], []
for bp in self.basepairs:
new_bp = Basepair(n_ss.get_residue(uuid=bp.res1.uuid),
n_ss.get_residue(uuid=bp.res2.uuid),
bp.uuid)
basepairs.append(new_bp)
for end in self.ends:
i = self.basepairs.index(end)
ends.append(basepairs[i])
return Motif(n_ss, basepairs, ends, self.name, self.path, self.mtype,
self.score, self.end_ids[::], self.id)
[docs] def copy_w_res(self, res, bps):
"""
creates a deep copy of Motif instance replacing residues and basepairs
that were already created. This is used when a pose is being copied to
make sure there are duplicate copies of residues and basepairs.
:param res: list of residues that were already copied
:param bps: list of basepairs that were already copied
:type res: list of secondary_structure.Residues
:type bps: list of secondary_structure.Basepairs
:returns: deep copy of instance
:rtype: secondary_structure.Motif
"""
chains = []
m = Motif()
for c in self.structure.chains:
new_res = []
for r in c.residues:
new_r = res[r.uuid]
new_res.append(new_r)
chains.append(Chain(new_res))
m.structure.chains = chains
m.end_ids = self.end_ids[::]
new_bps = []
new_ends = []
for bp in self.basepairs:
new_bps.append(bps[bp.uuid])
for end in self.ends:
new_ends.append(bps[bp.uuid])
m.basepairs = new_bps
m.ends = new_ends
return m
[docs] def to_str(self):
"""
creates a stringified verision of this instance
:returns: stringified verision of instance
:rtype: str
"""
s = str(self.mtype) + "!" + self.name + "!" + self.path + "!" + self.structure.to_str() + "!"
res = self.residues()
for bp in self.basepairs:
s += str(res.index(bp.res1)) + " " + str(res.index(bp.res2)) + "@"
s += "!"
for end in self.ends:
s += str(self.basepairs.index(end)) + " "
s += "!"
for ei in self.end_ids:
s += ei + " "
return s
[docs]class Pose(RNAStructure):
"""
Complete secondary structure container for representing an RNA Pose.
Contains both the sequence indentity of each residue with its corresponding
dot bracket notation symbol but also includes basepair objects to represent
the pairs between residues. This class parallels the pose.Pose class for
describing RNA with 3D coordinates.
Having a parallel class for secondary structure makes it simple to move
between secondary structure and full atom representations of RNA.
A pose is a composite structure containing more then one motif.
:param structure: structure containing residue and chain information for
this RNAStructure instance
:param basepairs: basepairs contained in RNAStructure
:param ends: the basepairs at the end of chains. These define connection
points to other Motifs
:param name: name of Motif
:param path: location of where Pose originated from, this is just
a place holder for converting from pose.Pose
:param score: the score generated by motif_scorer.MotifScorer
:param end_ids: strings indenifying the secondary structure and sequence
in the perspective of a given basepair end see
:func:`assign_end_id_new`
:param id: unique indentifer for motif
:param r_struct: an RNAStructure instance to setup from
:type structure: secondary_structure.Structure
:type basepairs: list of secondary_structure.Basepairs
:type ends: list of secondary_structure.Basepairs
:type name: str
:type path: str
:type score: float
:type end_ids: list of strs
:type id: uuid.uuid1
:type r_structure: secondary_structure.RNAStructure
:attributes:
`structure`: secondary_structure.Structure
structure containing residue and chain information for
this Motif instance
`basepairs`: list of secondary_structure.Basepairs
Basepairs between residues
`ends`: list of secondary_structure.Basepairs
Basepair ends where RNA structures can be connected
`name`: str
the name of the Pose
`path`: str
location of where Pose originated from, this is just
a place holder for converting from pose.Pose
`score` : float
the score generated by motif_scorer.MotifScorer, estimates secondary
structure stability
`end_ids`: list of strs
strings indenifying the secondary structure and sequence
in the perspective of a given basepair end see
:func:`assign_end_id_new`
`motifs`: list of secondary_structure.Motifs
all motifs that are contained in this Pose, excluding helices which
need to be built and are stored in helices. Instead motifs stores all
basepair steps as seperate motifs
`helices`: list of secondary_structure.Motifs
helical motifs that are more then 2 basepairs. needs to be build with
:func:`Pose.build_helices`
"""
def __init__(self, structure=None, basepairs=None, ends=None,
name="assembled", path="assembled", score=0, end_ids=None,
r_struct=None):
self.structure = structure
if self.structure is None:
self.structure = Structure()
self.basepairs = basepairs
if self.basepairs is None:
self.basepairs = []
self.name = name
self.path = path
self.score = score
self.ends = ends
if self.ends is None:
self.ends = []
self.end_ids = end_ids
if self.end_ids is None:
self.end_ids = []
self.motifs = []
self.helices = []
if r_struct is not None:
self.__dict__.update(r_struct.__dict__)
def __repr__(self):
return "<secondary_structure.Pose( " + self.sequence() + " " + self.dot_bracket() + " )"
[docs] def motif(self, m_id):
"""
gets a motif by its unique indentifer
:param m_id: motifs unique indentifier, Motif.id
:type m_id: uuid.uuid1
:returns: motif matching to its unique indentifier
:rtype: secondary_structure.Motif
"""
for m in self.motifs:
if m.id == m_id:
return m
return None
[docs] def replace_sequence(self, seq):
"""
changes the sequence of structure.
:param seq: the new sequence of structure
:type seq: str
:returns: None
"""
super(self.__class__, self).replace_sequence(seq)
for m in self.motifs:
for i, end in enumerate(m.ends):
m.end_ids[i] = assign_end_id_new(m, end)
[docs] def copy(self):
"""
creates a deep copy of instance
:returns: deep copy
:rtype: secondary_structure.Pose
"""
c_rna_struct = super(self.__class__, self).copy()
c_p = Pose(r_struct=c_rna_struct)
res = {r.uuid : r for r in self.residues()}
bps = {bp.uuid : bp for bp in self.basepairs}
new_motifs = []
for m in self.motifs:
m_copy = m.copy_w_res(res, bps)
new_motifs.append(m_copy)
c_p.motifs = new_motifs
return c_p
[docs] def to_str(self):
"""
generates a stringified verision of this instance.
:returns: stringified verision of pose
:rtype: str
"""
s = self.name + "#" + self.path + "#" + self.structure.to_str() + "#"
res = self.residues()
for bp in self.basepairs:
s += str(res.index(bp.res1)) + " " + str(res.index(bp.res2)) + "@"
s += "#"
for end in self.ends:
s += str(self.basepairs.index(end)) + " "
s += "#"
for ei in self.end_ids:
s += ei + " "
s += "#"
for m in self.motifs:
s += m.to_str() + "#"
return s
[docs] def build_helices(self):
"""
finds all basepair step motifs and builds helices from them. This is
only required for external applications such as rosetta. helices get
stored in member variable helices.
"""
steps = []
for m in self.motifs:
if m.mtype != motif_type.HELIX:
continue
steps.append(m)
seen = {}
current = None
helix_motifs = []
found = 0
while 1:
helix_motifs = []
current = None
for m1 in steps:
found = 0
if m1 in seen:
continue
for m2 in steps:
if m2 in seen:
continue
if m1 == m2:
continue
for end in m2.ends:
if m1.ends[0] == end:
found = 1
break
if not found:
current = m1
break
if found or current is None:
break
found = 1
while found:
seen[current] = 1
helix_motifs.append(current)
found = 0
for m in steps:
if m in seen:
continue
if m.ends[0] == current.ends[1]:
current = m
found = 1
break
res1, res2 = [], []
bps, ends = [], []
res1.append(helix_motifs[0].chains()[0].first())
res2.append(helix_motifs[0].chains()[1].last())
bps.append(helix_motifs[0].ends[0])
ends.append(helix_motifs[0].ends[0])
ends.append(helix_motifs[-1].ends[1])
for m in helix_motifs:
res1.append(m.chains()[0].last())
res2.append(m.chains()[1].first())
bps.append(m.ends[1])
res2.reverse()
chains = [Chain(res1), Chain(res2)]
struc = Structure(chains)
m = Motif(struc, bps, ends)
self.helices.append(m)
[docs]def str_to_residue(s):
"""
converts a residue from string generated from :func:`Residue.to_str`
:param s: string created by Residue.to_str()
:type s: str
:return: chain from str
:rtype: secondary_structure.Residue
"""
spl = s.split(",")
return Residue(spl[0], spl[1], int(spl[2]), spl[3], uuid.uuid1(), spl[4])
[docs]def str_to_chain(s):
"""
converts a chain from string generated from :func:`Chain.to_str`
:param s: string created by Chain.to_str()
:type s: str
:return: chain from str
:rtype: secondary_structure.Chain
"""
spl = s.split(";")
c = Chain()
for r_str in spl[:-1]:
r = str_to_residue(r_str)
c.residues.append(r)
return c
[docs]def str_to_structure(s):
"""
converts a structure from string generated from :func:`Structure.to_str`
:param s: string created by Structure.to_str()
:type s: str
:return: structure from str
:rtype: secondary_structure.Structure
"""
spl = s.split("|")
chains = []
for c_str in spl[:-1]:
c = str_to_chain(c_str)
chains.append(c)
return Structure(chains)
[docs]def str_to_motif(s):
"""
converts a motif from string generated from :func:`Motif.to_str`
:param s: string created by Motif.to_str()
:type s: str
:return: motif from str
:rtype: secondary_structure.Motif
"""
spl = s.split("!")
m = Motif()
m.mtype = int(spl[0])
m.name = spl[1]
m.path = spl[2]
m.structure = str_to_structure(spl[3])
res = m.residues()
for bp_str in spl[4].split("@")[:-1]:
res_is = bp_str.split()
r1 = res[int(res_is[0])]
r2 = res[int(res_is[1])]
m.basepairs.append(Basepair(r1, r2))
for end_i in spl[5].split():
m.ends.append(m.basepairs[int(end_i)])
m.end_ids = spl[6].split()
return m
[docs]def str_to_pose(s):
"""
converts a pose from string generated from :func:`Pose.to_str`
:param s: string created by Pose.to_str()
:type s: str
:return: pose from str
:rtype: secondary_structure.Pose
"""
spl = s.split("#")
p = Pose()
p.name = spl[0]
p.path = spl[1]
p.structure = str_to_structure(spl[2])
res = p.residues()
for bp_str in spl[3].split("@")[:-1]:
res_is = bp_str.split()
r1 = res[int(res_is[0])]
r2 = res[int(res_is[1])]
p.basepairs.append(Basepair(r1, r2))
for end_i in spl[4].split():
p.ends.append(p.basepairs[int(end_i)])
p.end_ids = spl[5].split()
motifs = []
for str in spl[6:-1]:
spl2 = str.split("!")
m = Motif()
m.mtype = int(spl2[0])
m.name = spl2[1]
m.path = spl2[2]
m.structure = str_to_structure(spl2[3])
chains = []
for c in m.chains():
res = []
for r in c.residues:
r_new = p.get_residue(r.num, r.chain_id)
res.append(r_new)
chains.append(Chain(res))
m.structure.chains = chains
res = m.residues()
for bp_str in spl2[4].split("@")[:-1]:
res_is = bp_str.split()
r1 = res[int(res_is[0])]
r2 = res[int(res_is[1])]
for bp in p.basepairs:
if bp.res1 == r1 and bp.res2 == r2:
m.basepairs.append(bp)
for end_i in spl2[5].split():
m.ends.append(m.basepairs[int(end_i)])
m.end_ids = spl2[6].split()
motifs.append(m)
p.motifs = motifs
return p
[docs]def assign_end_id_new(ss, end):
"""
generate a new end_id based on the secondary structure instance in the
perspective of the supplied end. An end id is a composition of both the
sequence and secondary structure in a single string.
Two GC pairs in a row would be: GG_LL_CC_RR. Sequence followed by secondary
structure with L being left bracket, R being right bracket and U being dot.
:param ss: secondary structure instance either RNAStructure,Motif or Pose
:param end: secondary structure basepair that you want the end id to be in
:return:
"""
if end not in ss.ends:
raise exceptions.SecondaryStructureException(
"supplied an end that is not in current ss element")
all_chains = ss.structure.chains[::]
open_chains = []
for c in all_chains:
if c.first() == end.res1 or c.first() == end.res2:
open_chains.append(c)
break
if len(open_chains) == 0:
raise exceptions.SecondaryStructureException(
"could not find chain to start with")
all_chains.remove(open_chains[0])
seen_res = {}
seen_bp = {}
saved_bp = None
structure = ""
seq = ""
bounds = [0, 0]
ss_chains = []
count = 0
while len(open_chains) > 0:
c = open_chains.pop(0)
for r in c.residues:
count += 1
dot_bracket = "."
bp = ss.get_basepair(r)
saved_bp = None
if bp is not None:
saved_bp = bp
partner_res = bp.partner(r)
if bp not in seen_bp and r not in seen_res and \
partner_res not in seen_res:
seen_res[r] = 1
dot_bracket = "("
elif partner_res in seen_res:
if seen_res[partner_res] > 1:
dot_bracket = "."
else:
dot_bracket = ")"
seen_res[r] = 1
seen_res[partner_res] += 1
structure += dot_bracket
seq += r.name
if saved_bp is not None:
seen_bp[saved_bp] = 1
bounds[1] = count
ss_chains.append([seq, structure])
structure = ""
seq = ""
best_score = -1
for c in all_chains:
score = 0
for r in c.residues:
bp = ss.get_basepair(r)
if bp is not None and bp in seen_bp:
score += 1
if score > best_score:
best_score = score
best_chains = []
for c in all_chains:
score = 0
for r in c.residues:
bp = ss.get_basepair(r)
if bp is not None and bp in seen_bp:
score += 1
if score == best_score:
best_chains.append(c)
best_chain = None
best_score = 10000
for c in best_chains:
pos = 1000
for i, r in enumerate(c.residues):
bp = ss.get_basepair(r)
if bp is not None and bp in seen_bp:
pos = i
break
if pos < best_score:
best_score = pos
best_chain = c
if best_chain is None:
break
all_chains.remove(best_chain)
open_chains.append(best_chain)
ss_id = ""
for i, chain in enumerate(ss_chains):
ss_id += chain[0] + "_"
for e in chain[1]:
if e == "(":
ss_id += "L"
elif e == ")":
ss_id += "R"
elif e == ".":
ss_id += "U"
else:
raise exceptions.SecondaryStructureException(
"unexpected symbol in dot bracket notation: " + e)
if i != len(ss_chains)-1:
ss_id += "_"
return ss_id