# Quick Recipes Short, copy-paste snippets for common workflows. For detailed behavior, conventions, and parameter notes, see the [Functionality Guide](functionality.md). --- ## 1) Load a structure from local file or PDB ID ```python from pdb_cpp import Coor coor_local = Coor("tests/input/1y0m.cif") coor_remote = Coor(pdb_id="1y0m") ``` ## 2) Force a specific file format When the file extension is absent or misleading, pass `format=` to override auto-detection. This works on both the constructor and `read()`: ```python from pdb_cpp import Coor # Constructor coor = Coor("structure.dat", format="cif") # mmCIF content, wrong extension coor = Coor("structure.dat", format="pdb") # PDB content, wrong extension # read() on an existing object coor = Coor() coor.read("structure.dat", format="pqr") ``` Accepted values: `"pdb"`, `"cif"`, `"pqr"`, `"gro"`. Omit `format` (or pass `format=""`) to infer from the file extension as usual. ## 3) Load an asymmetric unit or biological assembly from RCSB ```python from pdb_cpp import rcsb asym_unit = rcsb.load("1y0m", structure="asymmetric_unit") assembly_1 = rcsb.load("5a9z", structure="biological_assembly", assembly_id=1) # Download only path = rcsb.download("5a9z", structure="biological_assembly", assembly_id=1) print(path) ``` ## 4) Inspect atom properties ```python from pdb_cpp import Coor coor = Coor("tests/input/1y0m.cif") # Coordinates as a NumPy array print(coor.xyz.shape) # (N, 3) # Atom names, chain IDs, residue names as strings print(coor.name_str[:5]) # ['N', 'CA', 'C', 'O', 'CB'] print(coor.chain_str[:5]) # ['A', 'A', 'A', 'A', 'A'] print(coor.resname_str[:5]) # ['THR', 'THR', ...] # B-factors and occupancies print(coor.beta[:5]) print(coor.occ[:5]) ``` ## 5) Extract an interface selection between two chains ```python from pdb_cpp import Coor coor = Coor("tests/input/1rxz.pdb") # Atoms in chain A within 10 Å of chain B interface_a = coor.select_atoms("chain A and within 10.0 of chain B") interface_a.write("interface_A_vs_B.pdb") ``` ## 6) Select a receptor-ligand complex subset ```python from pdb_cpp import Coor coor = Coor("tests/input/1rxz.pdb") # Keep only chains A and B (example receptor/ligand pair) complex_ab = coor.select_atoms("chain A B") complex_ab.write("complex_AB.pdb") ``` ## 7) Clean up: remove incomplete backbone residues ```python from pdb_cpp import Coor, select coor = Coor("tests/input/1y0m.cif") clean = select.remove_incomplete_backbone_residues(coor) print(f"Before: {coor.len}, After: {clean.len}") ``` ## 8) Sequence alignment (two chains) ```python from pdb_cpp import Coor, alignment coor_1 = Coor("tests/input/1u85.pdb") coor_2 = Coor("tests/input/1ubd.pdb") seq_1 = coor_1.get_aa_seq()["A"] seq_2 = coor_2.get_aa_seq()["C"] aln_1, aln_2, score = alignment.align_seq(seq_1, seq_2) print(f"Score: {score}") alignment.print_align_seq(aln_1, aln_2) ``` ## 9) Structural alignment (RMSD-based) ```python from pdb_cpp import Coor, core, analysis coor_1 = Coor("tests/input/1u85.pdb") coor_2 = Coor("tests/input/1ubd.pdb") idx_1, idx_2 = core.get_common_atoms(coor_1, coor_2, chain_1=["A"], chain_2=["C"]) core.coor_align(coor_1, coor_2, idx_1, idx_2, frame_ref=0) rmsd_values = analysis.rmsd(coor_1, coor_2, index_list=[idx_1, idx_2]) print(f"RMSD: {rmsd_values[0]:.3f} Å") ``` ## 10) One-step sequence-based alignment ```python from pdb_cpp import Coor, core coor_1 = Coor("tests/input/1u85.pdb") coor_2 = Coor("tests/input/1ubd.pdb") rmsd_list, align_idx_1, align_idx_2 = core.align_seq_based( coor_1, coor_2, chain_1=["A"], chain_2=["C"] ) print(f"RMSD: {rmsd_list[0]:.3f} Å") ``` ## 11) Chain-permutation alignment (unknown chain mapping) ```python from pdb_cpp import Coor, alignment coor_1 = Coor("tests/input/1rxz_colabfold_model_1.pdb") coor_2 = Coor("tests/input/1rxz.pdb") rmsds, mappings = alignment.align_chain_permutation(coor_1, coor_2) print(f"Best RMSD: {rmsds[0]:.3f} Å") ``` ## 12) TM-align and TM-score ```python from pdb_cpp import Coor, geom from pdb_cpp.core import tmalign_ca coor_1 = Coor("tests/input/1y0m.cif") coor_2 = Coor("tests/input/1ubd.pdb") tm = tmalign_ca( coor_1, coor_2, chain_1=["A"], chain_2=["C"], mm=0, include_transform=True, ) R = tm.rotation t = tm.translation # Apply x' = R x + t to the mobile structure coordinates coor_1.xyz = geom.apply_transform(coor_1.xyz, R, t) print(f"L_ali={tm.L_ali}, RMSD={tm.rmsd:.3f}, TM1={tm.TM1:.4f}, TM2={tm.TM2:.4f}") ``` ## 13) DockQ with automatic chain-role inference ```python from pdb_cpp import Coor, analysis model = Coor("tests/input/1rxz_colabfold_model_1.pdb") native = Coor("tests/input/1rxz.pdb") scores = analysis.dockQ(model, native) print(f"DockQ: {scores['DockQ'][0]:.3f}") print(f"Fnat: {scores['Fnat'][0]:.3f}, Fnonnat: {scores['Fnonnat'][0]:.3f}") print(f"LRMS: {scores['LRMS'][0]:.3f}, iRMS: {scores['iRMS'][0]:.3f}") ``` ## 14) DockQ with explicit receptor/ligand chains ```python from pdb_cpp import Coor, analysis model = Coor("tests/input/1rxz_colabfold_model_1.pdb") native = Coor("tests/input/1rxz.pdb") scores = analysis.dockQ( model, native, rec_chains=["B"], lig_chains=["C"], native_rec_chains=["A"], native_lig_chains=["B"], ) print(f"DockQ: {scores['DockQ'][0]:.3f}") ``` ## 15) DockQ multimer from the command line The installed `pdb_cpp_dockq` bin uses `analysis.dockQ_multimer()` directly, including its automatic chain-map search: ```bash pdb_cpp_dockq tests/input/1a2k_model.pdb tests/input/1a2k.pdb ``` Provide an explicit native:model map when you want to override the automatic mapping: ```bash pdb_cpp_dockq tests/input/1a2k_model.pdb tests/input/1a2k.pdb --chain-map A:B,B:A,C:C ``` ## 16) Secondary structure assignment ```python from pdb_cpp import Coor, TMalign coor = Coor("tests/input/1y0m.cif") ss_list = TMalign.compute_secondary_structure(coor) for chain_id, ss_string in ss_list[0].items(): print(f"Chain {chain_id}: {ss_string}") ``` ## 17) Geometry utilities: distance matrix ```python from pdb_cpp import Coor, geom coor = Coor("tests/input/1y0m.cif") ca = coor.select_atoms("name CA") dmat = geom.distance_matrix(ca, ca) print(f"Shape: {dmat.shape}") ``` ## 18) SASA and interface SASA ```python from pdb_cpp import Coor from pdb_cpp.analysis import sasa coor = Coor("tests/input/1a2k.pdb") interface = sasa.buried_surface_area( coor, receptor_sel="chain A", ligand_sel="chain B", by_residue=True, )[0] print(f"Buried surface: {interface['buried_surface']:.2f} A^2") print(f"Interface area: {interface['interface_area']:.2f} A^2") print(f"Buried polar : {interface['buried_polar_surface']:.2f} A^2") print(f"Buried apolar : {interface['buried_apolar_surface']:.2f} A^2") print(f"Interface polar area : {interface['interface_polar_area']:.2f} A^2") print(f"Interface apolar area: {interface['interface_apolar_area']:.2f} A^2") for residue in interface["residue_buried_surface"]: print(residue["partner"], residue["chain"], residue["resid"], residue["buried_area"]) ``` ## 19) Estimate protein-protein shape complementarity ```python from pdb_cpp import Coor from pdb_cpp.analysis import sasa coor = Coor("tests/input/1a2k.pdb") sc = sasa.shape_complementarity( coor, receptor_sel="chain A", ligand_sel="chain B", dots_per_sq_angstrom=12.0, search_radius=1.5, )[0] print(f"Sc: {sc['shape_complementarity']:.3f}") print(f"Receptor Sc: {sc['receptor_shape_complementarity']:.3f}") print(f"Ligand Sc: {sc['ligand_shape_complementarity']:.3f}") print(f"Interface dot pairs: {sc['interface_dot_pairs']}") print(f"Mean paired distance: {sc['mean_interface_distance']:.3f} A") ``` ## 20) D/L amino acid and nucleic acid sequences ```python from pdb_cpp import Coor coor = Coor("tests/input/1y0m.cif") # Standard amino acid sequences seqs = coor.get_aa_seq() print(seqs) # D-residues as lowercase dl_seqs = coor.get_aa_DL_seq() print(dl_seqs) # Amino acid + nucleic acid sequences all_seqs = coor.get_aa_na_seq() print(all_seqs) ``` ## 21) Hybrid-36 encoding/decoding ```python from pdb_cpp.core import hy36encode, hy36decode # Encode large atom numbers for PDB format encoded = hy36encode(5, 100000) # "A0000" decoded = hy36decode(5, "A0000") # 100000 print(encoded, decoded) ``` ## 22) Multi-model: iterate and compute per-model RMSD ```python from pdb_cpp import Coor, analysis coor = Coor("tests/input/2rri.pdb") # NMR ensemble ref = Coor("tests/input/2rri.pdb") # RMSD of each model against model 0 rmsd_values = analysis.rmsd(coor, ref, selection="name CA", frame_ref=0) for i, r in enumerate(rmsd_values): print(f"Model {i}: RMSD = {r:.3f} Å") ``` ## 23) Bond topology (CONECT / _struct_conn) ```python from pdb_cpp import Coor # Both PDB (CONECT lines) and mmCIF (_struct_conn) bond records are supported coor = Coor("tests/input/1u85.pdb") # Inspect covalent bonds (dict: atom_serial -> list[bonded_serials]) print(f"Atoms with bonds: {len(coor.conect)}") for atom_num, bonded in list(coor.conect.items())[:5]: print(f" Atom {atom_num} -> {bonded}") # Convert PDB to mmCIF, preserving bond topology via _struct_conn coor.write("output_1u85.cif") # Reload from mmCIF and confirm bonds survived coor2 = Coor("output_1u85.cif") print(f"Bonds after mmCIF round-trip: {len(coor2.conect)}") # Bonds are also preserved after atom selection and renumbering ligand = coor.select_atoms("not protein") ligand.write("ligand_only.pdb") ``` ## 24) Interaction analysis (H-bonds and salt bridges) ```python from pdb_cpp import Coor from pdb_cpp.analysis import hbonds, salt_bridge coor = Coor("tests/input/2rri.cif") # All protein–protein H-bonds (one list per model) all_bonds = hbonds.hbonds(coor) print(f"Model 0: {len(all_bonds[0])} H-bonds") # Inspect an individual bond b = all_bonds[0][0] print(f"{b.donor_chain}{b.donor_resid} {b.donor_heavy_name}" f" -> {b.acceptor_chain}{b.acceptor_resid} {b.acceptor_name}" f" d(DA)={b.dist_DA:.2f} Å angle={b.angle_DHA:.1f}°") # Filter inter-chain H-bonds inter = [b for b in all_bonds[0] if b.donor_chain != b.acceptor_chain] print(f"{len(inter)} inter-chain H-bonds in model 0") # Protein donors → nucleic-acid acceptors (protein–RNA interface) rna_bonds = hbonds.hbonds(coor, donor_sel="protein", acceptor_sel="nucleic") # Protein cations → nucleic phosphate anions (salt bridges) salt = salt_bridge.salt_bridges(coor, cation_sel="protein", anion_sel="nucleic") print(f"Model 0: {len(salt[0])} salt bridges") ```