Source code for qDNA.io.io_pdb

import os

from .io_xyz import write_xyz

# ----------------------------------------------------------------------



[docs]
def load_pdb(filepath):
    """
    Load atomic data from a PDB file.
    Parses the ATOM and HETATM records in a PDB file and extracts relevant
    information such as atom type, residue, chain, residue ID, coordinates,
    and element type.

    Parameters
    ----------
    filepath : str
        Path to the PDB file to be loaded.

    Returns
    -------
    list of dict
        A list of dictionaries, each containing atomic data with keys:
        'atom', 'residue', 'chain', 'res_id', 'x', 'y', 'z', and 'element'.
    """

    pdb_content = []

    with open(filepath, "r", encoding="utf-8") as file:
        for line in file:
            # if line.startswith("TER"):
            #     break
            if line.startswith("ATOM") or line.startswith("HETATM"):  # careful with HETATM
                pdb_data = {
                    "atom": line[12:16].strip(),
                    "residue": line[17:20].strip(),
                    "chain": line[21].strip(),
                    "res_id": int(line[22:26]),
                    "x": float(line[30:38]),
                    "y": float(line[38:46]),
                    "z": float(line[46:54]),
                    "element": line[76:78].strip(),
                }
                pdb_content.append(pdb_data)

    return pdb_content



def modify_base_idx(base_idx, start_idx, n, **kwargs):

    lower_idx_list = kwargs.get("lower_idx_list", None)
    lower_offset = kwargs.get("lower_offset", 0)
    lower_direction = kwargs.get("lower_direction", "5-3")
    lower_continued = kwargs.get("lower_continued", True)

    if lower_idx_list is not None:
        return lower_idx_list[base_idx]

    base_idx_mod = None
    i = abs(base_idx)
    if lower_direction == "5-3" and lower_continued:
        base_idx_mod = i
    if lower_direction == "5-3" and not lower_continued:
        base_idx_mod = n + i
    if lower_direction == "3-5" and lower_continued:
        base_idx_mod = 2 * n - 1 - (i - start_idx) + n + start_idx
    if lower_direction == "3-5" and not lower_continued:
        base_idx_mod = 2 * n - 1 - (i - start_idx) + start_idx
    base_idx_mod += lower_offset
    return base_idx_mod



[docs]
def pdb_to_xyz(filepath, **kwargs):
    """
    Converts a PDB file to XYZ format and writes the output to separate files
    for each base and backbone in the structure.

    Parameters
    ----------
    filepath : str
        Path to the input PDB file.

    Notes
    -----
    .. note::
        - The function creates a directory named after the input file (without extension) to store the output XYZ files.
        - Each base and backbone is written to separate XYZ files.
        - Base indices are adjusted for the lower strand if applicable.

    """

    filename = os.path.splitext(os.path.basename(filepath))[0]
    directory = os.path.join(os.path.dirname(filepath), filename)
    os.makedirs(directory, exist_ok=True)

    elements, elements_backbone = [], []
    coordinates, coordinates_backbone = [], []

    start_idx = 0
    old_base_id = None
    old_chain_id = None
    old_base_idx = 0

    lower_strand = False
    n = 0

    pdb_content = load_pdb(filepath)

    for i, entry in enumerate(pdb_content):

        element_id = entry["atom"]
        chain_id = entry["chain"]  # e.g. A
        base_idx = entry["res_id"]  # e.g. 1
        x, y, z = entry["x"], entry["y"], entry["z"]
        element = entry["element"]  # e.g. N, C, O

        if element == "":
            element = element_id[0]

        # increase base_idx for the lower strand
        first_entry = i == 0
        if first_entry:
            start_idx = base_idx

        if kwargs.get('no_chain_id', False):
            chain_changes = base_idx == 1 and old_base_idx == 27
        else:
            chain_changes = chain_id != old_chain_id  #  # changed!!
        if chain_changes and not first_entry:
            lower_strand = True
            n = old_base_idx + 1 - start_idx

        if lower_strand:
            base_idx = modify_base_idx(base_idx, start_idx, n, **kwargs)

        base_id = entry["residue"]  # e.g. DC
        base_id = str(base_idx).zfill(2) + base_id[1]  # e.g. 01C
        backbone_id = str(base_idx).zfill(2) + "B"  # e.g. 01B

        info = None  # for debugging

        if base_id != old_base_id and old_base_id is not None:
            write_xyz(directory, old_base_id, elements, coordinates, info=info)
            write_xyz(
                directory, old_backbone_id, elements_backbone, coordinates_backbone, info=info
            )
            elements, elements_backbone = [], []
            coordinates, coordinates_backbone = [], []

        if "'" in element_id or "P" in element_id:
            elements_backbone.append(element)
            coordinates_backbone.append((x, y, z))
        else:
            elements.append(element)
            coordinates.append((x, y, z))

        old_base_idx = base_idx
        old_base_id = base_id
        old_chain_id = chain_id
        old_backbone_id = backbone_id

    if old_base_id is not None:
        write_xyz(directory, old_base_id, elements, coordinates, info=info)
        write_xyz(directory, old_backbone_id, elements_backbone, coordinates_backbone, info=info)



def find_pdb(directory):
    files = os.listdir(directory)
    return [os.path.splitext(file)[0] for file in files if file.endswith(".pdb")]


# ----------------------------------------------------------------------