Quick Recipes
Short, copy-paste snippets for common workflows. For detailed behavior, conventions, and parameter notes, see the Functionality Guide.
1) Load a structure from local file or PDB ID
from pdb_cpp import Coor
coor_local = Coor("tests/input/1y0m.cif")
coor_remote = Coor(pdb_id="1y0m")
2) Force a specific file format
When the file extension is absent or misleading, pass format= to override
auto-detection. This works on both the constructor and read():
from pdb_cpp import Coor
# Constructor
coor = Coor("structure.dat", format="cif") # mmCIF content, wrong extension
coor = Coor("structure.dat", format="pdb") # PDB content, wrong extension
# read() on an existing object
coor = Coor()
coor.read("structure.dat", format="pqr")
Accepted values: "pdb", "cif", "pqr", "gro".
Omit format (or pass format="") to infer from the file extension as usual.
3) Load an asymmetric unit or biological assembly from RCSB
from pdb_cpp import rcsb
asym_unit = rcsb.load("1y0m", structure="asymmetric_unit")
assembly_1 = rcsb.load("5a9z", structure="biological_assembly", assembly_id=1)
# Download only
path = rcsb.download("5a9z", structure="biological_assembly", assembly_id=1)
print(path)
4) Inspect atom properties
from pdb_cpp import Coor
coor = Coor("tests/input/1y0m.cif")
# Coordinates as a NumPy array
print(coor.xyz.shape) # (N, 3)
# Atom names, chain IDs, residue names as strings
print(coor.name_str[:5]) # ['N', 'CA', 'C', 'O', 'CB']
print(coor.chain_str[:5]) # ['A', 'A', 'A', 'A', 'A']
print(coor.resname_str[:5]) # ['THR', 'THR', ...]
# B-factors and occupancies
print(coor.beta[:5])
print(coor.occ[:5])
5) Extract an interface selection between two chains
from pdb_cpp import Coor
coor = Coor("tests/input/1rxz.pdb")
# Atoms in chain A within 10 Å of chain B
interface_a = coor.select_atoms("chain A and within 10.0 of chain B")
interface_a.write("interface_A_vs_B.pdb")
6) Select a receptor-ligand complex subset
from pdb_cpp import Coor
coor = Coor("tests/input/1rxz.pdb")
# Keep only chains A and B (example receptor/ligand pair)
complex_ab = coor.select_atoms("chain A B")
complex_ab.write("complex_AB.pdb")
7) Clean up: remove incomplete backbone residues
from pdb_cpp import Coor, select
coor = Coor("tests/input/1y0m.cif")
clean = select.remove_incomplete_backbone_residues(coor)
print(f"Before: {coor.len}, After: {clean.len}")
8) Sequence alignment (two chains)
from pdb_cpp import Coor, alignment
coor_1 = Coor("tests/input/1u85.pdb")
coor_2 = Coor("tests/input/1ubd.pdb")
seq_1 = coor_1.get_aa_seq()["A"]
seq_2 = coor_2.get_aa_seq()["C"]
aln_1, aln_2, score = alignment.align_seq(seq_1, seq_2)
print(f"Score: {score}")
alignment.print_align_seq(aln_1, aln_2)
9) Structural alignment (RMSD-based)
from pdb_cpp import Coor, core, analysis
coor_1 = Coor("tests/input/1u85.pdb")
coor_2 = Coor("tests/input/1ubd.pdb")
idx_1, idx_2 = core.get_common_atoms(coor_1, coor_2, chain_1=["A"], chain_2=["C"])
core.coor_align(coor_1, coor_2, idx_1, idx_2, frame_ref=0)
rmsd_values = analysis.rmsd(coor_1, coor_2, index_list=[idx_1, idx_2])
print(f"RMSD: {rmsd_values[0]:.3f} Å")
10) One-step sequence-based alignment
from pdb_cpp import Coor, core
coor_1 = Coor("tests/input/1u85.pdb")
coor_2 = Coor("tests/input/1ubd.pdb")
rmsd_list, align_idx_1, align_idx_2 = core.align_seq_based(
coor_1, coor_2, chain_1=["A"], chain_2=["C"]
)
print(f"RMSD: {rmsd_list[0]:.3f} Å")
11) Chain-permutation alignment (unknown chain mapping)
from pdb_cpp import Coor, alignment
coor_1 = Coor("tests/input/1rxz_colabfold_model_1.pdb")
coor_2 = Coor("tests/input/1rxz.pdb")
rmsds, mappings = alignment.align_chain_permutation(coor_1, coor_2)
print(f"Best RMSD: {rmsds[0]:.3f} Å")
12) TM-align and TM-score
from pdb_cpp import Coor, geom
from pdb_cpp.core import tmalign_ca
coor_1 = Coor("tests/input/1y0m.cif")
coor_2 = Coor("tests/input/1ubd.pdb")
tm = tmalign_ca(
coor_1,
coor_2,
chain_1=["A"],
chain_2=["C"],
mm=0,
include_transform=True,
)
R = tm.rotation
t = tm.translation
# Apply x' = R x + t to the mobile structure coordinates
coor_1.xyz = geom.apply_transform(coor_1.xyz, R, t)
print(f"L_ali={tm.L_ali}, RMSD={tm.rmsd:.3f}, TM1={tm.TM1:.4f}, TM2={tm.TM2:.4f}")
13) DockQ with automatic chain-role inference
from pdb_cpp import Coor, analysis
model = Coor("tests/input/1rxz_colabfold_model_1.pdb")
native = Coor("tests/input/1rxz.pdb")
scores = analysis.dockQ(model, native)
print(f"DockQ: {scores['DockQ'][0]:.3f}")
print(f"Fnat: {scores['Fnat'][0]:.3f}, Fnonnat: {scores['Fnonnat'][0]:.3f}")
print(f"LRMS: {scores['LRMS'][0]:.3f}, iRMS: {scores['iRMS'][0]:.3f}")
14) DockQ with explicit receptor/ligand chains
from pdb_cpp import Coor, analysis
model = Coor("tests/input/1rxz_colabfold_model_1.pdb")
native = Coor("tests/input/1rxz.pdb")
scores = analysis.dockQ(
model,
native,
rec_chains=["B"],
lig_chains=["C"],
native_rec_chains=["A"],
native_lig_chains=["B"],
)
print(f"DockQ: {scores['DockQ'][0]:.3f}")
15) DockQ multimer from the command line
The installed pdb_cpp_dockq bin uses analysis.dockQ_multimer() directly,
including its automatic chain-map search:
pdb_cpp_dockq tests/input/1a2k_model.pdb tests/input/1a2k.pdb
Provide an explicit native:model map when you want to override the automatic mapping:
pdb_cpp_dockq tests/input/1a2k_model.pdb tests/input/1a2k.pdb --chain-map A:B,B:A,C:C
16) Secondary structure assignment
from pdb_cpp import Coor, TMalign
coor = Coor("tests/input/1y0m.cif")
ss_list = TMalign.compute_secondary_structure(coor)
for chain_id, ss_string in ss_list[0].items():
print(f"Chain {chain_id}: {ss_string}")
17) Geometry utilities: distance matrix
from pdb_cpp import Coor, geom
coor = Coor("tests/input/1y0m.cif")
ca = coor.select_atoms("name CA")
dmat = geom.distance_matrix(ca, ca)
print(f"Shape: {dmat.shape}")
18) SASA and interface SASA
from pdb_cpp import Coor
from pdb_cpp.analysis import sasa
coor = Coor("tests/input/1a2k.pdb")
interface = sasa.buried_surface_area(
coor,
receptor_sel="chain A",
ligand_sel="chain B",
by_residue=True,
)[0]
print(f"Buried surface: {interface['buried_surface']:.2f} A^2")
print(f"Interface area: {interface['interface_area']:.2f} A^2")
print(f"Buried polar : {interface['buried_polar_surface']:.2f} A^2")
print(f"Buried apolar : {interface['buried_apolar_surface']:.2f} A^2")
print(f"Interface polar area : {interface['interface_polar_area']:.2f} A^2")
print(f"Interface apolar area: {interface['interface_apolar_area']:.2f} A^2")
for residue in interface["residue_buried_surface"]:
print(residue["partner"], residue["chain"], residue["resid"], residue["buried_area"])
19) Estimate protein-protein shape complementarity
from pdb_cpp import Coor
from pdb_cpp.analysis import sasa
coor = Coor("tests/input/1a2k.pdb")
sc = sasa.shape_complementarity(
coor,
receptor_sel="chain A",
ligand_sel="chain B",
dots_per_sq_angstrom=12.0,
search_radius=1.5,
)[0]
print(f"Sc: {sc['shape_complementarity']:.3f}")
print(f"Receptor Sc: {sc['receptor_shape_complementarity']:.3f}")
print(f"Ligand Sc: {sc['ligand_shape_complementarity']:.3f}")
print(f"Interface dot pairs: {sc['interface_dot_pairs']}")
print(f"Mean paired distance: {sc['mean_interface_distance']:.3f} A")
20) D/L amino acid and nucleic acid sequences
from pdb_cpp import Coor
coor = Coor("tests/input/1y0m.cif")
# Standard amino acid sequences
seqs = coor.get_aa_seq()
print(seqs)
# D-residues as lowercase
dl_seqs = coor.get_aa_DL_seq()
print(dl_seqs)
# Amino acid + nucleic acid sequences
all_seqs = coor.get_aa_na_seq()
print(all_seqs)
21) Hybrid-36 encoding/decoding
from pdb_cpp.core import hy36encode, hy36decode
# Encode large atom numbers for PDB format
encoded = hy36encode(5, 100000) # "A0000"
decoded = hy36decode(5, "A0000") # 100000
print(encoded, decoded)
22) Multi-model: iterate and compute per-model RMSD
from pdb_cpp import Coor, analysis
coor = Coor("tests/input/2rri.pdb") # NMR ensemble
ref = Coor("tests/input/2rri.pdb")
# RMSD of each model against model 0
rmsd_values = analysis.rmsd(coor, ref, selection="name CA", frame_ref=0)
for i, r in enumerate(rmsd_values):
print(f"Model {i}: RMSD = {r:.3f} Å")
23) Bond topology (CONECT / _struct_conn)
from pdb_cpp import Coor
# Both PDB (CONECT lines) and mmCIF (_struct_conn) bond records are supported
coor = Coor("tests/input/1u85.pdb")
# Inspect covalent bonds (dict: atom_serial -> list[bonded_serials])
print(f"Atoms with bonds: {len(coor.conect)}")
for atom_num, bonded in list(coor.conect.items())[:5]:
print(f" Atom {atom_num} -> {bonded}")
# Convert PDB to mmCIF, preserving bond topology via _struct_conn
coor.write("output_1u85.cif")
# Reload from mmCIF and confirm bonds survived
coor2 = Coor("output_1u85.cif")
print(f"Bonds after mmCIF round-trip: {len(coor2.conect)}")
# Bonds are also preserved after atom selection and renumbering
ligand = coor.select_atoms("not protein")
ligand.write("ligand_only.pdb")
24) Interaction analysis (H-bonds and salt bridges)
from pdb_cpp import Coor
from pdb_cpp.analysis import hbonds, salt_bridge
coor = Coor("tests/input/2rri.cif")
# All protein–protein H-bonds (one list per model)
all_bonds = hbonds.hbonds(coor)
print(f"Model 0: {len(all_bonds[0])} H-bonds")
# Inspect an individual bond
b = all_bonds[0][0]
print(f"{b.donor_chain}{b.donor_resid} {b.donor_heavy_name}"
f" -> {b.acceptor_chain}{b.acceptor_resid} {b.acceptor_name}"
f" d(DA)={b.dist_DA:.2f} Å angle={b.angle_DHA:.1f}°")
# Filter inter-chain H-bonds
inter = [b for b in all_bonds[0] if b.donor_chain != b.acceptor_chain]
print(f"{len(inter)} inter-chain H-bonds in model 0")
# Protein donors → nucleic-acid acceptors (protein–RNA interface)
rna_bonds = hbonds.hbonds(coor, donor_sel="protein", acceptor_sel="nucleic")
# Protein cations → nucleic phosphate anions (salt bridges)
salt = salt_bridge.salt_bridges(coor, cation_sel="protein", anion_sel="nucleic")
print(f"Model 0: {len(salt[0])} salt bridges")