Quick Recipes

Short, copy-paste snippets for common workflows. For detailed behavior, conventions, and parameter notes, see the Functionality Guide.


1) Load a structure from local file or PDB ID

from pdb_cpp import Coor

coor_local = Coor("tests/input/1y0m.cif")
coor_remote = Coor(pdb_id="1y0m")

2) Force a specific file format

When the file extension is absent or misleading, pass format= to override auto-detection. This works on both the constructor and read():

from pdb_cpp import Coor

# Constructor
coor = Coor("structure.dat", format="cif")   # mmCIF content, wrong extension
coor = Coor("structure.dat", format="pdb")   # PDB content, wrong extension

# read() on an existing object
coor = Coor()
coor.read("structure.dat", format="pqr")

Accepted values: "pdb", "cif", "pqr", "gro". Omit format (or pass format="") to infer from the file extension as usual.

3) Load an asymmetric unit or biological assembly from RCSB

from pdb_cpp import rcsb

asym_unit = rcsb.load("1y0m", structure="asymmetric_unit")
assembly_1 = rcsb.load("5a9z", structure="biological_assembly", assembly_id=1)

# Download only
path = rcsb.download("5a9z", structure="biological_assembly", assembly_id=1)
print(path)

4) Inspect atom properties

from pdb_cpp import Coor

coor = Coor("tests/input/1y0m.cif")

# Coordinates as a NumPy array
print(coor.xyz.shape)             # (N, 3)

# Atom names, chain IDs, residue names as strings
print(coor.name_str[:5])          # ['N', 'CA', 'C', 'O', 'CB']
print(coor.chain_str[:5])         # ['A', 'A', 'A', 'A', 'A']
print(coor.resname_str[:5])       # ['THR', 'THR', ...]

# B-factors and occupancies
print(coor.beta[:5])
print(coor.occ[:5])

5) Extract an interface selection between two chains

from pdb_cpp import Coor

coor = Coor("tests/input/1rxz.pdb")

# Atoms in chain A within 10 Å of chain B
interface_a = coor.select_atoms("chain A and within 10.0 of chain B")
interface_a.write("interface_A_vs_B.pdb")

6) Select a receptor-ligand complex subset

from pdb_cpp import Coor

coor = Coor("tests/input/1rxz.pdb")

# Keep only chains A and B (example receptor/ligand pair)
complex_ab = coor.select_atoms("chain A B")
complex_ab.write("complex_AB.pdb")

7) Clean up: remove incomplete backbone residues

from pdb_cpp import Coor, select

coor = Coor("tests/input/1y0m.cif")
clean = select.remove_incomplete_backbone_residues(coor)
print(f"Before: {coor.len}, After: {clean.len}")

8) Sequence alignment (two chains)

from pdb_cpp import Coor, alignment

coor_1 = Coor("tests/input/1u85.pdb")
coor_2 = Coor("tests/input/1ubd.pdb")

seq_1 = coor_1.get_aa_seq()["A"]
seq_2 = coor_2.get_aa_seq()["C"]

aln_1, aln_2, score = alignment.align_seq(seq_1, seq_2)
print(f"Score: {score}")
alignment.print_align_seq(aln_1, aln_2)

9) Structural alignment (RMSD-based)

from pdb_cpp import Coor, core, analysis

coor_1 = Coor("tests/input/1u85.pdb")
coor_2 = Coor("tests/input/1ubd.pdb")

idx_1, idx_2 = core.get_common_atoms(coor_1, coor_2, chain_1=["A"], chain_2=["C"])
core.coor_align(coor_1, coor_2, idx_1, idx_2, frame_ref=0)

rmsd_values = analysis.rmsd(coor_1, coor_2, index_list=[idx_1, idx_2])
print(f"RMSD: {rmsd_values[0]:.3f} Å")

10) One-step sequence-based alignment

from pdb_cpp import Coor, core

coor_1 = Coor("tests/input/1u85.pdb")
coor_2 = Coor("tests/input/1ubd.pdb")

rmsd_list, align_idx_1, align_idx_2 = core.align_seq_based(
    coor_1, coor_2, chain_1=["A"], chain_2=["C"]
)
print(f"RMSD: {rmsd_list[0]:.3f} Å")

11) Chain-permutation alignment (unknown chain mapping)

from pdb_cpp import Coor, alignment

coor_1 = Coor("tests/input/1rxz_colabfold_model_1.pdb")
coor_2 = Coor("tests/input/1rxz.pdb")

rmsds, mappings = alignment.align_chain_permutation(coor_1, coor_2)
print(f"Best RMSD: {rmsds[0]:.3f} Å")

12) TM-align and TM-score

from pdb_cpp import Coor, geom
from pdb_cpp.core import tmalign_ca

coor_1 = Coor("tests/input/1y0m.cif")
coor_2 = Coor("tests/input/1ubd.pdb")

tm = tmalign_ca(
    coor_1,
    coor_2,
    chain_1=["A"],
    chain_2=["C"],
    mm=0,
    include_transform=True,
)

R = tm.rotation
t = tm.translation

# Apply x' = R x + t to the mobile structure coordinates
coor_1.xyz = geom.apply_transform(coor_1.xyz, R, t)

print(f"L_ali={tm.L_ali}, RMSD={tm.rmsd:.3f}, TM1={tm.TM1:.4f}, TM2={tm.TM2:.4f}")

13) DockQ with automatic chain-role inference

from pdb_cpp import Coor, analysis

model = Coor("tests/input/1rxz_colabfold_model_1.pdb")
native = Coor("tests/input/1rxz.pdb")

scores = analysis.dockQ(model, native)
print(f"DockQ: {scores['DockQ'][0]:.3f}")
print(f"Fnat: {scores['Fnat'][0]:.3f}, Fnonnat: {scores['Fnonnat'][0]:.3f}")
print(f"LRMS: {scores['LRMS'][0]:.3f}, iRMS: {scores['iRMS'][0]:.3f}")

14) DockQ with explicit receptor/ligand chains

from pdb_cpp import Coor, analysis

model = Coor("tests/input/1rxz_colabfold_model_1.pdb")
native = Coor("tests/input/1rxz.pdb")

scores = analysis.dockQ(
    model,
    native,
    rec_chains=["B"],
    lig_chains=["C"],
    native_rec_chains=["A"],
    native_lig_chains=["B"],
)
print(f"DockQ: {scores['DockQ'][0]:.3f}")

15) DockQ multimer from the command line

The installed pdb_cpp_dockq bin uses analysis.dockQ_multimer() directly, including its automatic chain-map search:

pdb_cpp_dockq tests/input/1a2k_model.pdb tests/input/1a2k.pdb

Provide an explicit native:model map when you want to override the automatic mapping:

pdb_cpp_dockq tests/input/1a2k_model.pdb tests/input/1a2k.pdb --chain-map A:B,B:A,C:C

16) Secondary structure assignment

from pdb_cpp import Coor, TMalign

coor = Coor("tests/input/1y0m.cif")
ss_list = TMalign.compute_secondary_structure(coor)

for chain_id, ss_string in ss_list[0].items():
    print(f"Chain {chain_id}: {ss_string}")

17) Geometry utilities: distance matrix

from pdb_cpp import Coor, geom

coor = Coor("tests/input/1y0m.cif")
ca = coor.select_atoms("name CA")
dmat = geom.distance_matrix(ca, ca)
print(f"Shape: {dmat.shape}")

18) SASA and interface SASA

from pdb_cpp import Coor
from pdb_cpp.analysis import sasa

coor = Coor("tests/input/1a2k.pdb")

interface = sasa.buried_surface_area(
    coor,
    receptor_sel="chain A",
    ligand_sel="chain B",
    by_residue=True,
)[0]

print(f"Buried surface: {interface['buried_surface']:.2f} A^2")
print(f"Interface area: {interface['interface_area']:.2f} A^2")
print(f"Buried polar  : {interface['buried_polar_surface']:.2f} A^2")
print(f"Buried apolar : {interface['buried_apolar_surface']:.2f} A^2")
print(f"Interface polar area : {interface['interface_polar_area']:.2f} A^2")
print(f"Interface apolar area: {interface['interface_apolar_area']:.2f} A^2")

for residue in interface["residue_buried_surface"]:
    print(residue["partner"], residue["chain"], residue["resid"], residue["buried_area"])

19) Estimate protein-protein shape complementarity

from pdb_cpp import Coor
from pdb_cpp.analysis import sasa

coor = Coor("tests/input/1a2k.pdb")

sc = sasa.shape_complementarity(
    coor,
    receptor_sel="chain A",
    ligand_sel="chain B",
    dots_per_sq_angstrom=12.0,
    search_radius=1.5,
)[0]

print(f"Sc: {sc['shape_complementarity']:.3f}")
print(f"Receptor Sc: {sc['receptor_shape_complementarity']:.3f}")
print(f"Ligand Sc: {sc['ligand_shape_complementarity']:.3f}")
print(f"Interface dot pairs: {sc['interface_dot_pairs']}")
print(f"Mean paired distance: {sc['mean_interface_distance']:.3f} A")

20) D/L amino acid and nucleic acid sequences

from pdb_cpp import Coor

coor = Coor("tests/input/1y0m.cif")

# Standard amino acid sequences
seqs = coor.get_aa_seq()
print(seqs)

# D-residues as lowercase
dl_seqs = coor.get_aa_DL_seq()
print(dl_seqs)

# Amino acid + nucleic acid sequences
all_seqs = coor.get_aa_na_seq()
print(all_seqs)

21) Hybrid-36 encoding/decoding

from pdb_cpp.core import hy36encode, hy36decode

# Encode large atom numbers for PDB format
encoded = hy36encode(5, 100000)   # "A0000"
decoded = hy36decode(5, "A0000")  # 100000
print(encoded, decoded)

22) Multi-model: iterate and compute per-model RMSD

from pdb_cpp import Coor, analysis

coor = Coor("tests/input/2rri.pdb")  # NMR ensemble
ref = Coor("tests/input/2rri.pdb")

# RMSD of each model against model 0
rmsd_values = analysis.rmsd(coor, ref, selection="name CA", frame_ref=0)
for i, r in enumerate(rmsd_values):
    print(f"Model {i}: RMSD = {r:.3f} Å")

23) Bond topology (CONECT / _struct_conn)

from pdb_cpp import Coor

# Both PDB (CONECT lines) and mmCIF (_struct_conn) bond records are supported
coor = Coor("tests/input/1u85.pdb")

# Inspect covalent bonds (dict: atom_serial -> list[bonded_serials])
print(f"Atoms with bonds: {len(coor.conect)}")
for atom_num, bonded in list(coor.conect.items())[:5]:
    print(f"  Atom {atom_num} -> {bonded}")

# Convert PDB to mmCIF, preserving bond topology via _struct_conn
coor.write("output_1u85.cif")

# Reload from mmCIF and confirm bonds survived
coor2 = Coor("output_1u85.cif")
print(f"Bonds after mmCIF round-trip: {len(coor2.conect)}")

# Bonds are also preserved after atom selection and renumbering
ligand = coor.select_atoms("not protein")
ligand.write("ligand_only.pdb")

24) Interaction analysis (H-bonds and salt bridges)

from pdb_cpp import Coor
from pdb_cpp.analysis import hbonds, salt_bridge

coor = Coor("tests/input/2rri.cif")

# All protein–protein H-bonds (one list per model)
all_bonds = hbonds.hbonds(coor)
print(f"Model 0: {len(all_bonds[0])} H-bonds")

# Inspect an individual bond
b = all_bonds[0][0]
print(f"{b.donor_chain}{b.donor_resid} {b.donor_heavy_name}"
      f" -> {b.acceptor_chain}{b.acceptor_resid} {b.acceptor_name}"
      f"  d(DA)={b.dist_DA:.2f} Å  angle={b.angle_DHA:.1f}°")

# Filter inter-chain H-bonds
inter = [b for b in all_bonds[0] if b.donor_chain != b.acceptor_chain]
print(f"{len(inter)} inter-chain H-bonds in model 0")

# Protein donors → nucleic-acid acceptors (protein–RNA interface)
rna_bonds = hbonds.hbonds(coor, donor_sel="protein", acceptor_sel="nucleic")

# Protein cations → nucleic phosphate anions (salt bridges)
salt = salt_bridge.salt_bridges(coor, cation_sel="protein", anion_sel="nucleic")
print(f"Model 0: {len(salt[0])} salt bridges")