Source code for rnamake.pdb_parser

import atom
import residue
import residue_type
import exceptions
import user_warnings

[docs]def parse(pdb_file):
    """
    very minimalistc pdb parser, currently does not support multiple MODELS
    in NMR structures but works well for what I need at the moment will be
    expanded in future versions. Currently returns an array of Residue object
    this is because it will mostly be called in in Structure object. Also does
    not parse specified connectivity in CONECT statements. If one is interested
    in parsing into a Structure object, please use structure_from_pdb in the
    structure module.

    :param pdb_file: The path to the PDB formatted file you wish to parse
    :type pdb_file: str

    :return: List of residue.Residue objects

    :examples:

    .. code-block:: python

        >>>import rnamake.unittests.files
        >>>residues = parse(rnamake.unittests.files.P4P6_PDB_PATH)
        >>>len(residues)
        157

        #Not yet sorted into chains, residue 106 is the first in this chain
        >>>residues[0]
        <Residue('G250 chain A')>
    """

    try:
        f = open(pdb_file)
        lines = f.readlines()
        f.close()
    except IOError:
        raise exceptions.PDBParserException("cannot parse pdb file: " +
                                            pdb_file + " as it does not exist")

    coordinates = []
    atomnames = []
    resnames = []
    resnums = []
    chainids = []
    icodes = []

    for line_num, line in enumerate(lines):
        startswith = line[0:6]
        if startswith == 'ATOM  ' or startswith == 'HETATM':
            atomname = line[12:16].strip()
            resname = line[17:21].strip()
            chid = line[21]
            alt = line[16]
            try:
                coords = [float(line[30:38]),
                          float(line[38:46]),
                          float(line[46:54])]
            except:
                raise exceptions.PDBParserError('invalid or missing coordinate(s) at '
                                                'line {0}.'.format(line))

            if len(atomname) == 0:
                user_warnings.warn("line " + str(line_num) + ": no atomname detected",
                                   user_warnings.PDBFormatWarning)

            if len(resname) == 0:
                user_warnings.warn("line " + str(line_num) + ": no resname detected",
                                   user_warnings.PDBFormatWarning)

            atomnames.append(atomname)
            resnames.append(resname)
            chainids.append(chid)
            resnums.append(line[22:26])
            icodes.append(line[26])
            coordinates.append(coords)

        # TODO handle multiple models at some point
        elif startswith == 'MODEL':
            raise exceptions.PDBParserException(pdb_file + " contains NMR MODELS " +
                                                "most likely this is not being parsed " +
                                                "properly")

        elif startswith[:3] == 'END':
            break

    residue_atoms = {}
    for i in range(len(atomnames)):
        if resnames[i] == "HOH":
            continue
        key = resnames[i] + " " + resnums[i] + " " + chainids[i] + " " + icodes[i]
        if key not in residue_atoms:
            residue_atoms[key] = []
        already_has = 0
        for a in residue_atoms[key]:
            if a.name == atomnames[i]:
                already_has = 1
                break
        if already_has:
            continue

        residue_atoms[key].append(atom.Atom(atomnames[i],coordinates[i]))

    residues = []
    for key,res_atoms in residue_atoms.iteritems():
        if len(res_atoms) < 6:
            continue
        spl = key.split()
        rtype = residue_type.get_rtype(spl[0])
        if rtype is None:
            user_warnings.warn("restype " + spl[0] + ": is unknown",
                                user_warnings.PDBFormatWarning)
            continue
        icode = ""
        if len(spl) > 3:
            icode = spl[3]
        r = residue.Residue(rtype, spl[0], int(spl[1]), spl[2], icode)
        r.setup_atoms(res_atoms)
        residues.append(r)

    return residues