Source code for rnamake.x3dna

import subprocess
import settings
import os
import re
import numpy as np
import util

import motif_type, exceptions

X3DNA_BIN_PATH = settings.X3DNA_PATH + "bin/"
os.environ['X3DNA'] =  settings.X3DNA_PATH

# TODO figure out what operating system is being used
# TODO create at enum type for basepair types instead of strings
# TODO check to see what happens with RNA without chain ids
[docs]class X3dna(object): """ a simple wrapper for interfacing with the x3dna package for determining the basepairing in a pdb structure. Please support the author of x3dna directory and cite his work and register on his site: http://x3dna.org/ Data is returned as a list of X3DNA basepair objects which are simplistic containers for storing which residues are contained in a basepair and what the reference frame and type of the basepair is. These are different from basepair.Basepair objects which store the actual atomic information of a basepair .. code-block:: python >>> from rnamake.unittests import files >>> from rnamake import x3dna >>> x = x3dna.X3dna() >>> basepairs = x.get_basepairs(files.P4P6_PDB_PATH) >>> len(basepairs) 79 >>> basepairs[10] <X3DNA Basepair(A131 A192 cW-W)> """ def __init__(self): self.find_pair_path = X3DNA_BIN_PATH + "find_pair" self.analyze_path = X3DNA_BIN_PATH + "analyze" self.dssr_path = X3DNA_BIN_PATH + "x3dna-dssr" if not os.path.isfile(self.find_pair_path): raise exceptions.X3dnaException( "%s executable should exist but doesnt! " % (self.find_pair_path)) if not os.path.isfile(self.analyze_path): raise exceptions.X3dnaException( "%s executable should exist but doesnt! " % (self.analyze_path)) if not os.path.isfile(self.dssr_path): raise exceptions.X3dnaException( "%s executable should exist but doesnt! " % (self.dssr_path)) self.ref_frames_extra_files = ( "auxiliary.par,bestpairs.pdb,bp_helical.par,bp_order.dat," + \ "bp_step.par,cf_7methods.par,col_chains.scr,col_helices.scr," + \ "hel_regions.pdb,hstacking.pdb,poc_haxis.r3d,stacking.pdb").split(",") self.dssr_extra_files = ( "dssr-2ndstrs.ct,dssr-2ndstrs.dbn,dssr-helices.pdb,"+ \ "dssr-pairs.pdb,dssr-stems.pdb,hel_regions.pdb,hstacking.pdb,"+ \ "poc_haxis.r3d,stacking.pdb,dssr-torsions.dat,dssr-Kturns.pdb,"+ \ "dssr-multiplets.pdb,dssr-hairpins.pdb,dssr-Aminors.pdb").split(",") def _remove_files(self, f_list): for f in f_list: try: os.remove(f) except: pass
[docs] def generate_ref_frame(self, pdb_path): """ runs x3dna/find_pair and x3dna/analyze to get the basepairing information of a given pdb and it is updated to a ref_frames.dat file which can be parsed by get_basepair_info if necessary :param pdb_path: the pdb you want to run without the .pdb extension :type pdb_path: str .. code-block:: python # after running should be a "ref_frames.dat" in current dir >>> from rnamake import x3dna >>> x = x3dna.X3dna() >>> x.generate_ref_frame("test.pdb") """ pdb_name = util.filename(pdb_path)[:-4] if not os.path.isfile(pdb_path): raise exceptions.X3dnaException( pdb_path + " is not found cannot generate ref_frames.dat file") result = subprocess.call( self.find_pair_path + " " + pdb_path + " 2> /dev/null " + "stdout | " + self.analyze_path + " stdin >& /dev/null", executable="/bin/bash", shell=True) if result != 0: raise exceptions.X3dnaException("find_pair did not run correctly") if not os.path.isfile("ref_frames.dat"): raise exceptions.X3dnaException( "generate_ref_frame was ran on %s but no " % (pdb_path) + "ref_frames.dat file was produced something extremely wrong") self._remove_files(self.ref_frames_extra_files) os.remove(pdb_name+".out")
[docs] def generate_dssr_file(self, pdb_path): """ runs x3dna's dssr which assigns the secondary structure and each of the motifs in a RNA structure :param pdb_name: the pdb you want to run without the .pdb extension :type pdb_name: str """ if not os.path.isfile(pdb_path): raise exceptions.X3dnaException( pdb_path + " is not found cannot generate dssr file") pdb_name = util.filename(pdb_path)[:-4] result = subprocess.call( self.dssr_path + " -i="+pdb_path+" -o="+pdb_name+ \ "_dssr.out --non-pair >& /dev/null", shell=True, executable="/bin/bash") if result != 0: raise exceptions.X3dnaException("dssr did not run correctly") self._remove_files(self.dssr_extra_files)
def _parse_ref_frame_file(self, ref_frames_path): """ Helper function do not run outside! parses out basepair information from ref_frames.dat which can be generated using generate_ref_frame :param ref_frames_path: the path to the ref_frames.dat file :type ref_frames_path: str """ f = open(ref_frames_path) lines = f.readlines() f.close() p = re.compile('(\w+)\:\.*(\d+)\_\:\[\.*\w+\]\w+ \- (\w+)\:\.*(\d+)') r = [] d = [] basepairs = [] start_bp = 0 bps = None for l in lines: if l[0:3] == "...": start_bp = 1 if p.search(l): m = p.search(l) bps = m.groups() else: raise exceptions.X3dnaException( "did not parse 3DNA ref frame correctly") continue if start_bp == 0: continue if start_bp == 1: split = re.split("\s+", l) d = [ float(x) for x in split[1:4] ] start_bp += 1 elif start_bp > 1: split = re.split("\s+", l) r.append([ float(x) for x in split[1:4] ]) start_bp += 1 #have everything, generate BasePair object if start_bp == 5: res1 = Residue(int(bps[1]), bps[0]) res2 = Residue(int(bps[3]), bps[2]) basepairs.append(Basepair(res1, res2, np.array(r), np.array(d))) r = [] d = [] return basepairs def _get_ref_frame_path(self, path): """ determines were the ref_frames.dat file generated by x3dna is located checks where the pdb is where would be the it would be if its a motif directory. Also checks local directory and if does not exist in either location it creates a new one using generate_ref_frame. This is a helper function do not call directly! :param pdb_name: the pdb you want to run without the .pdb extension :type pdb_name: str """ filename = util.filename(path) if os.path.isdir(path): ref_frames_path = path + "/ref_frames.dat" else: base_dir = util.base_dir(path) filename = filename[:-4] ref_frames_path = base_dir + "/ref_frames.dat" if not os.path.isfile(ref_frames_path): ref_frames_path = "ref_frames.dat" self.generate_ref_frame(path) return ref_frames_path def _get_dssr_file_path(self, path): """ determines were the pdb_dssr.out file generated by x3dna is located checks where the pdb is where would be the it would be if its a motif directory. Also checks local directory and if does not exist in either location it creates a new one using generate_dssr_file. This is a helper function do not call directly! :param pdb_name: the pdb you want to run without the .pdb extension :type pdb_name: str """ filename = util.filename(path) dssr_file_path = None if os.path.isdir(path): dssr_file_path = path + "/" + filename + "_dssr.out" else: base_dir = util.base_dir(path) filename = filename[:-4] dssr_file_path = base_dir + "/" + filename + "_dssr.out" if not os.path.isfile(dssr_file_path): dssr_file_path = filename + "_dssr.out" self.generate_dssr_file(path) return dssr_file_path def _parse_dssr_res_str(self, res_str): """ helper function to convert the dssr residue indentifier into a X3DNA residue object :param res_str: dssr residue indentifier :type res_str: str """ # TODO reevalute this code, not sure I need all this complexity p = re.compile(r"^\d+$") spl = re.split("\.",res_str) chain = spl[0] rnum = spl[1][1:] i_code = "" if not p.match(rnum): r_spl = spl[1] i_code = "" i_spl = re.split("\^",spl[1]) if len(i_spl) > 1: r_spl = i_spl[0] i_code = i_spl[1] num = "" for e in r_spl[::-1]: try: int_e = int(e) num = e + num except: break try: int(rnum) except: return None return Residue(int(rnum), chain, i_code) def _divide_dssr_file_into_sections(self, dssr_file_path): """ splits up the dssr file by the title :param dssr_file_path: the location of the dssr file :type dssr_file_path: str """ f = open(dssr_file_path) lines = f.readlines() f.close() p = re.compile(r"List of \d+\s*(\S+)") sections = {} section = [] section_name = "" for l in lines: if len(section) == 0 and p.match(l): m = p.match(l) section_name = m.group(1) if section_name == 'helix': section_name = 'helices' if section_name[-1] != "s": section_name = section_name + "s" if l[0] == '*': if len(section) != 0 and section_name not in sections: sections[section_name] = section section = [] continue section.append(l) return sections
[docs] def get_basepairs(self, pdb_path): """ generates and/or collects the information from the ref_frame.dat file and dssr output file to determine the basepairing between residues and what type each basepair is. Returns a list of basepair X3DNA objects. :param pdb_path: path to the pdb file :type pdb_path: str """ ref_frames_path = self._get_ref_frame_path(pdb_path) dssr_file_path = self._get_dssr_file_path(pdb_path) basepairs = self._parse_ref_frame_file(ref_frames_path) section = self._divide_dssr_file_into_sections(dssr_file_path)['bases'] for l in section: spl = re.split("\s+",l) if len(spl) < 6: continue if not spl[1].isdigit(): continue res1 = self._parse_dssr_res_str(spl[2]) res2 = self._parse_dssr_res_str(spl[3]) if res1 == None or res2 == None: continue bp_type = spl[8] if len(bp_type) < 1: bp_type = spl[7] found = 0 for bp in basepairs: if bp.res1 == res1 and bp.res2 == res2: bp.bp_type = bp_type found = 1 if bp.res2 == res1 and bp.res1 == res2: bp.bp_type = bp_type found = 1 if found: break if not found: basepairs.append(Basepair(res1, res2, np.eye(3), np.array([-1,-1,-1]))) basepairs[-1].bp_type = bp_type return basepairs
def get_dssr_file_sections(self, pdb_path): dssr_file_path = self._get_dssr_file_path(pdb_path) return self._divide_dssr_file_into_sections(dssr_file_path) def get_motifs(self, pdb_path): dssr_file_path = self._get_dssr_file_path(pdb_path) types = { 'hairpins' : motif_type.HAIRPIN, 'bulges' : motif_type.TWOWAY, 'internals' : motif_type.TWOWAY, 'junctions' : motif_type.NWAY, 'non-loops' : motif_type.SSTRAND} file_sections = self._divide_dssr_file_into_sections(dssr_file_path) all_motifs = [] for section, t in types.iteritems(): if section in file_sections: motifs = self._parse_dssr_section(file_sections[section], t) all_motifs.extend(motifs) if 'stems' in file_sections: motifs = self._parse_dssr_helix_section(file_sections['stems']) all_motifs.extend(motifs) return all_motifs def _parse_dssr_section(self, section, mtype): motifs = [] seen_res = [] for l in section: spl = l.split() try: if spl[0][:3] != 'nts': continue except: continue if len(spl) < 3: continue res = [] for res_str in spl[2].split(","): res_obj = self._parse_dssr_res_str(res_str) res.append(res_obj) count = 0 for r in res: if r in seen_res: count += 1 if count == len(res): continue seen_res.extend(res) motifs.append(Motif(res, mtype)) return motifs def _parse_dssr_helix_section(self, section): p = re.compile("\d+") res = [] motifs = [] for l in section: spl = l.split() try: if not p.match(spl[0]): continue except: continue if int(spl[0]) == 1 and len(res) > 0: motifs.append(Motif(res, motif_type.HELIX)) res = [] res.append(self._parse_dssr_res_str(spl[1])) res.append(self._parse_dssr_res_str(spl[2])) if len(res) > 0: motifs.append(Motif(res, motif_type.HELIX)) return motifs
[docs]class Motif(object): """ Container for motif residues from dssr files produceed by x3dna. Should not be instantiated outside x3dna.X3dna class. :attributes: `residues` : list of Residue objects residues that belong to same motif `mtype`: motif_type enum the enum type of the given motif """ __slots__ = ["residues", "mtype"] def __init__(self, residues, mtype): self.residues, self.mtype = residues, mtype
[docs]class Basepair(object): """ Container for basepairs from ref_frames and dssr files produced by x3dna. Should not be instantiated outside x3dna.X3dna class. :attributes: `res1` : Residue first residue in basepair `res2` : Residue second residue in basepair `r` : np.array rotation matrix describing principle axies of orientation of basepair `d` : np.array center point of basepair `bp_type`: str x3dna dssr basepair type, see x3dna for more details """ __slots__ = ["res1", "res2", "r", "d", "bp_type"] def __init__(self, res1, res2, r, d): self.res1, self.res2, self.r, self.d = res1, res2, r, d self.bp_type = "c..." def __repr__(self): res1 = self.res1.chain_id + str(self.res1.num) + self.res1.i_code res2 = self.res2.chain_id + str(self.res2.num) + self.res2.i_code return "<X3DNA Basepair(%s %s %s)>" % \ (res1, res2, self.bp_type)
[docs]class Residue(object): """ Container for residues from ref_frames and dssr files produced by x3dna. Should not be instantiated outside x3dna.X3dna class. :attributes: `num` : int residue number `chain_id` : str residue chain id `i_code` : str residue insertion code """ __slots__ = ["num", "chain_id", "i_code"] def __init__(self, num, chain_id, i_code=""): self.num, self.chain_id, self.i_code = num, chain_id, i_code def __repr__(self): return "<X3DNA Residue(%s%s %s)>" % \ (self.num, self.i_code, self.chain_id) def __eq__(self, res): if res == None: return 0 return self.num == res.num and self.chain_id == res.chain_id and \ self.i_code == res.i_code def __ne__(self, res): return not (self.num == res.num and self.chain_id == res.chain_id \ and self.i_code == res.i_code)