Source code for rnamake.pdb_parser

import atom
import residue
import residue_type
import exceptions
import user_warnings

[docs]def parse(pdb_file): """ very minimalistc pdb parser, currently does not support multiple MODELS in NMR structures but works well for what I need at the moment will be expanded in future versions. Currently returns an array of Residue object this is because it will mostly be called in in Structure object. Also does not parse specified connectivity in CONECT statements. If one is interested in parsing into a Structure object, please use structure_from_pdb in the structure module. :param pdb_file: The path to the PDB formatted file you wish to parse :type pdb_file: str :return: List of residue.Residue objects :examples: .. code-block:: python >>>import rnamake.unittests.files >>>residues = parse(rnamake.unittests.files.P4P6_PDB_PATH) >>>len(residues) 157 #Not yet sorted into chains, residue 106 is the first in this chain >>>residues[0] <Residue('G250 chain A')> """ try: f = open(pdb_file) lines = f.readlines() f.close() except IOError: raise exceptions.PDBParserException("cannot parse pdb file: " + pdb_file + " as it does not exist") coordinates = [] atomnames = [] resnames = [] resnums = [] chainids = [] icodes = [] for line_num, line in enumerate(lines): startswith = line[0:6] if startswith == 'ATOM ' or startswith == 'HETATM': atomname = line[12:16].strip() resname = line[17:21].strip() chid = line[21] alt = line[16] try: coords = [float(line[30:38]), float(line[38:46]), float(line[46:54])] except: raise exceptions.PDBParserError('invalid or missing coordinate(s) at ' 'line {0}.'.format(line)) if len(atomname) == 0: user_warnings.warn("line " + str(line_num) + ": no atomname detected", user_warnings.PDBFormatWarning) if len(resname) == 0: user_warnings.warn("line " + str(line_num) + ": no resname detected", user_warnings.PDBFormatWarning) atomnames.append(atomname) resnames.append(resname) chainids.append(chid) resnums.append(line[22:26]) icodes.append(line[26]) coordinates.append(coords) # TODO handle multiple models at some point elif startswith == 'MODEL': raise exceptions.PDBParserException(pdb_file + " contains NMR MODELS " + "most likely this is not being parsed " + "properly") elif startswith[:3] == 'END': break residue_atoms = {} for i in range(len(atomnames)): if resnames[i] == "HOH": continue key = resnames[i] + " " + resnums[i] + " " + chainids[i] + " " + icodes[i] if key not in residue_atoms: residue_atoms[key] = [] already_has = 0 for a in residue_atoms[key]: if a.name == atomnames[i]: already_has = 1 break if already_has: continue residue_atoms[key].append(atom.Atom(atomnames[i],coordinates[i])) residues = [] for key,res_atoms in residue_atoms.iteritems(): if len(res_atoms) < 6: continue spl = key.split() rtype = residue_type.get_rtype(spl[0]) if rtype is None: user_warnings.warn("restype " + spl[0] + ": is unknown", user_warnings.PDBFormatWarning) continue icode = "" if len(spl) > 3: icode = spl[3] r = residue.Residue(rtype, spl[0], int(spl[1]), spl[2], icode) r.setup_atoms(res_atoms) residues.append(r) return residues