biotite-dev · padix-key · Mar 16, 2026 · Jan 30, 2026
diff --git a/benchmarks/sequence/align/benchmark_kmer.py b/benchmarks/sequence/align/benchmark_kmer.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pytest
+import biotite.sequence as seq
+import biotite.sequence.align as align
+
+SEQ_LENGTH = 10_000
+K = 3
+
+
+@pytest.fixture(scope="module")
+def sequence():
+    np.random.seed(0)
+    sequence = seq.ProteinSequence()
+    sequence.code = np.random.randint(
+        len(seq.ProteinSequence.alphabet), size=SEQ_LENGTH
+    )
+    return sequence
+
+
+@pytest.fixture(scope="module")
+def kmer_alphabet():
+    return align.KmerAlphabet(seq.ProteinSequence.alphabet, K)
+
+
+@pytest.fixture(scope="module")
+def matrix():
+    return align.SubstitutionMatrix.std_protein_matrix()
+
+
+@pytest.fixture(scope="module")
+def score_threshold_rule(matrix):
+    return align.ScoreThresholdRule(matrix, 10)
+
+
+@pytest.mark.benchmark
+def benchmark_create_kmers(kmer_alphabet, sequence):
+    """
+    Create k-mer codes from a sequence.
+    """
+    kmer_alphabet.create_kmers(sequence.code)
+
+
+@pytest.mark.benchmark
+def benchmark_similar_kmers(score_threshold_rule, kmer_alphabet):
+    """
+    Find all k-mers similar to a reference k-mer using a score threshold.
+    """
+    KMER_CODE = 0
+
+    score_threshold_rule.similar_kmers(kmer_alphabet, KMER_CODE)
diff --git a/benchmarks/sequence/align/benchmark_multiple.py b/benchmarks/sequence/align/benchmark_multiple.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+import pytest
+import biotite.sequence.align as align
+import biotite.sequence.io.fasta as fasta
+from tests.util import data_dir
+
+
+@pytest.fixture(scope="module")
+def sequences():
+    fasta_file = fasta.FastaFile.read(Path(data_dir("sequence")) / "cas9.fasta")
+    return list(fasta.get_sequences(fasta_file).values())
+
+
+@pytest.fixture(scope="module")
+def matrix():
+    return align.SubstitutionMatrix.std_protein_matrix()
+
+
+@pytest.mark.benchmark
+def benchmark_align_multiple(sequences, matrix):
+    """
+    Perform progressive multiple sequence alignment.
+    """
+    align.align_multiple(sequences, matrix, gap_penalty=(-10, -1))
diff --git a/benchmarks/sequence/align/benchmark_pairwise.py b/benchmarks/sequence/align/benchmark_pairwise.py
@@ -0,0 +1,48 @@
+from functools import partial
+from pathlib import Path
+import pytest
+import biotite.sequence as seq
+import biotite.sequence.align as align
+import biotite.sequence.io.fasta as fasta
+from tests.util import data_dir
+
+GAP_PENALTY = (-10, -1)
+
+
+@pytest.fixture(scope="module")
+def sequences():
+    fasta_file = fasta.FastaFile.read(Path(data_dir("sequence")) / "cas9.fasta")
+    return [seq.ProteinSequence(s) for s in fasta_file.values()]
+
+
+@pytest.fixture(scope="module")
+def matrix():
+    return align.SubstitutionMatrix.std_protein_matrix()
+
+
+@pytest.fixture(scope="module")
+def seq_pair(sequences):
+    return sequences[0], sequences[1]
+
+
+@pytest.fixture(scope="module")
+def seed(seq_pair):
+    return (len(seq_pair[0]) // 2, len(seq_pair[1]) // 2)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize(
+    "method",
+    [
+        partial(align.align_optimal, gap_penalty=GAP_PENALTY),
+        partial(align.align_banded, band=(-50, 50), gap_penalty=GAP_PENALTY),
+        partial(align.align_local_gapped, threshold=100, gap_penalty=GAP_PENALTY),
+    ],
+    ids=lambda x: x.func.__name__,
+)
+def benchmark_align_pairwise(seq_pair, matrix, seed, method):
+    """
+    Perform pairwise sequence alignment using different algorithms.
+    """
+    kwargs = {"seed": seed} if method.func is align.align_local_gapped else {}
+    method(seq_pair[0], seq_pair[1], matrix, **kwargs)
diff --git a/benchmarks/sequence/align/benchmark_selector.py b/benchmarks/sequence/align/benchmark_selector.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pytest
+import biotite.sequence as seq
+import biotite.sequence.align as align
+
+SEQ_LENGTH = 10_000
+K = 8
+S = 4
+WINDOW = 10
+ALPHABET = seq.NucleotideSequence.alphabet_unamb
+KMER_ALPHABET = align.KmerAlphabet(ALPHABET, K)
+
+
+@pytest.fixture(scope="module")
+def sequence():
+    np.random.seed(0)
+    s = seq.NucleotideSequence()
+    s.code = np.random.randint(len(ALPHABET), size=SEQ_LENGTH)
+    return s
+
+
+@pytest.mark.parametrize(
+    "selector",
+    [
+        align.MinimizerSelector(KMER_ALPHABET, window=WINDOW),
+        align.SyncmerSelector(ALPHABET, K, S),
+        align.CachedSyncmerSelector(ALPHABET, K, S),
+        align.MincodeSelector(KMER_ALPHABET, compression=4),
+    ],
+    ids=lambda x: x.__class__.__name__,
+)
+def benchmark_select(sequence, selector):
+    """
+    Select k-mers from a sequence using different selection strategies.
+    """
+    selector.select(sequence)
diff --git a/benchmarks/sequence/benchmark_alphabet.py b/benchmarks/sequence/benchmark_alphabet.py
@@ -0,0 +1,35 @@
+import pytest
+import biotite.sequence as seq
+
+SEQ_LENGTH = 10_000
+
+
+@pytest.fixture(scope="module")
+def alphabet():
+    return seq.ProteinSequence.alphabet
+
+
+@pytest.fixture(scope="module")
+def symbols():
+    return "ACDEFGHIKLMNPQRSTVWY" * (SEQ_LENGTH // 20)
+
+
+@pytest.fixture(scope="module")
+def code(alphabet, symbols):
+    return alphabet.encode_multiple(symbols)
+
+
+@pytest.mark.benchmark
+def benchmark_encode(alphabet, symbols):
+    """
+    Encode symbols into a sequence code.
+    """
+    alphabet.encode_multiple(symbols)
+
+
+@pytest.mark.benchmark
+def benchmark_decode(alphabet, code):
+    """
+    Decode a sequence code into symbols.
+    """
+    alphabet.decode_multiple(code)
diff --git a/benchmarks/sequence/benchmark_phylo.py b/benchmarks/sequence/benchmark_phylo.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pytest
+import biotite.sequence.phylo as phylo
+
+N = 20
+
+
+@pytest.fixture(scope="module")
+def distances():
+    np.random.seed(0)
+    rand = np.random.rand(N, N).astype(np.float32)
+    distances = (rand + rand.T) / 2
+    np.fill_diagonal(distances, 0)
+    return distances
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize(
+    "method",
+    [phylo.upgma, phylo.neighbor_joining],
+    ids=lambda x: x.__name__,
+)
+def benchmark_clustering(distances, method):
+    """
+    Perform hierarchical clustering from a distance matrix.
+    """
+    method(distances)
diff --git a/benchmarks/structure/benchmark_alphabet.py b/benchmarks/structure/benchmark_alphabet.py
@@ -7,7 +7,7 @@
 PDB_ID = "1aki"
 
 
-@pytest.fixture
+@pytest.fixture(scope="module")
 def atoms():
     pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / f"{PDB_ID}.bcif")
     return pdbx.get_structure(pdbx_file, model=1, include_bonds=True)

diff --git a/benchmarks/structure/benchmark_bonds.py b/benchmarks/structure/benchmark_bonds.py
@@ -0,0 +1,107 @@
+from pathlib import Path
+import pytest
+import biotite.structure as struc
+import biotite.structure.info as info
+import biotite.structure.io.pdbx as pdbx
+from tests.util import data_dir
+
+PDB_ID = "1aki"
+
+
+@pytest.fixture(autouse=True, scope="session")
+def load_ccd():
+    """
+    Ensure that the CCD is already loaded to avoid biasing tests with its loading time.
+    """
+    info.get_ccd()
+
+
+@pytest.fixture(scope="module")
+def atoms():
+    pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / f"{PDB_ID}.bcif")
+    return pdbx.get_structure(pdbx_file, model=1, include_bonds=True)
+
+
+@pytest.fixture(scope="module")
+def bond_array(atoms):
+    return atoms.bonds.as_array()
+
+
+@pytest.mark.benchmark
+def benchmark_bond_list_creation(atoms, bond_array):
+    """
+    Create a `BondList` from an array of bonds, which involves sorting and deduplication.
+    """
+    struc.BondList(atoms.array_length(), bond_array)
+
+
+@pytest.mark.benchmark
+@pytest.mark.parametrize(
+    "method",
+    [
+        struc.BondList.as_set,
+        struc.BondList.as_graph,
+        struc.BondList.as_array,
+        struc.BondList.get_all_bonds,
+        struc.BondList.adjacency_matrix,
+        struc.BondList.bond_type_matrix,
+    ],
+    ids=lambda x: x.__name__,
+)
+def benchmark_conversion(atoms, method):
+    """
+    Convert the `BondList` to a different representation.
+    """
+    method(atoms.bonds)
+
+
+@pytest.mark.benchmark
+def benchmark_get_bonds(atoms):
+    """
+    Get the bonds for each atom index.
+    """
+    for i in range(atoms.array_length()):
+        atoms.bonds.get_bonds(i)
+
+
+@pytest.mark.benchmark
+def benchmark_get_all_bonds(atoms):
+    """
+    Get the bonds for all atom indices.
+    """
+    atoms.bonds.get_all_bonds()
+
+
+@pytest.mark.benchmark
+def benchmark_concatenate(atoms):
+    """
+    Concatenate two `BondList` objects.
+    """
+    atoms.bonds.concatenate([atoms.bonds, atoms.bonds])
+
+
+@pytest.mark.parametrize(
+    "connect_fn", [struc.connect_via_distances, struc.connect_via_residue_names]
+)
+@pytest.mark.benchmark
+def benchmark_connect(atoms, connect_fn):
+    """
+    Find bonds between atoms using the specified method.
+    """
+    connect_fn(atoms)
+
+
+@pytest.mark.benchmark
+def benchmark_find_connected(atoms):
+    """
+    Find all connected atoms for a given atom index.
+    """
+    struc.find_connected(atoms.bonds, 0)
+
+
+@pytest.mark.benchmark
+def benchmark_find_rotatable_bonds(atoms):
+    """
+    Find all rotatable bonds in a `BondList`.
+    """
+    struc.find_rotatable_bonds(atoms.bonds)
diff --git a/benchmarks/structure/benchmark_celllist.py b/benchmarks/structure/benchmark_celllist.py
@@ -5,15 +5,28 @@
 from tests.util import data_dir
 
 
-@pytest.fixture
+@pytest.fixture(scope="module")
 def atoms():
     pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / "1gya.bcif")
     return pdbx.get_structure(pdbx_file, model=1)
 
 
-def benchmark_cell_list(atoms):
+@pytest.fixture(scope="module")
+def cell_list(atoms):
+    return struc.CellList(atoms, 5.0)
+
+
+@pytest.mark.benchmark
+def benchmark_cell_list_creation(atoms):
+    """
+    Create a cell list for a structure.
+    """
+    struc.CellList(atoms, 5.0)
+
+
+@pytest.mark.benchmark
+def benchmark_cell_list_compute_contacts(cell_list, atoms):
     """
-    Find all contacts in a structure using a cell list.
+    Find all contacts in a structure using an existing cell list.
     """
-    cell_list = struc.CellList(atoms, 5.0)
     cell_list.get_atoms(atoms.coord, 5.0)
diff --git a/benchmarks/structure/benchmark_charges.py b/benchmarks/structure/benchmark_charges.py
@@ -0,0 +1,16 @@
+import pytest
+import biotite.structure as struc
+import biotite.structure.info as info
+
+
+@pytest.fixture(scope="module")
+def atoms():
+    return info.residue("PNN")
+
+
+@pytest.mark.benchmark
+def benchmark_partial_charges(atoms):
+    """
+    Compute the partial charges of each atom in a structure.
+    """
+    struc.partial_charges(atoms)