Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions benchmarks/sequence/align/benchmark_kmer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import numpy as np
import pytest
import biotite.sequence as seq
import biotite.sequence.align as align

SEQ_LENGTH = 10_000
K = 3


@pytest.fixture(scope="module")
def sequence():
np.random.seed(0)
sequence = seq.ProteinSequence()
sequence.code = np.random.randint(
len(seq.ProteinSequence.alphabet), size=SEQ_LENGTH
)
return sequence


@pytest.fixture(scope="module")
def kmer_alphabet():
return align.KmerAlphabet(seq.ProteinSequence.alphabet, K)


@pytest.fixture(scope="module")
def matrix():
return align.SubstitutionMatrix.std_protein_matrix()


@pytest.fixture(scope="module")
def score_threshold_rule(matrix):
return align.ScoreThresholdRule(matrix, 10)


@pytest.mark.benchmark
def benchmark_create_kmers(kmer_alphabet, sequence):
"""
Create k-mer codes from a sequence.
"""
kmer_alphabet.create_kmers(sequence.code)


@pytest.mark.benchmark
def benchmark_similar_kmers(score_threshold_rule, kmer_alphabet):
"""
Find all k-mers similar to a reference k-mer using a score threshold.
"""
KMER_CODE = 0

score_threshold_rule.similar_kmers(kmer_alphabet, KMER_CODE)
24 changes: 24 additions & 0 deletions benchmarks/sequence/align/benchmark_multiple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from pathlib import Path
import pytest
import biotite.sequence.align as align
import biotite.sequence.io.fasta as fasta
from tests.util import data_dir


@pytest.fixture(scope="module")
def sequences():
fasta_file = fasta.FastaFile.read(Path(data_dir("sequence")) / "cas9.fasta")
return list(fasta.get_sequences(fasta_file).values())


@pytest.fixture(scope="module")
def matrix():
return align.SubstitutionMatrix.std_protein_matrix()


@pytest.mark.benchmark
def benchmark_align_multiple(sequences, matrix):
"""
Perform progressive multiple sequence alignment.
"""
align.align_multiple(sequences, matrix, gap_penalty=(-10, -1))
48 changes: 48 additions & 0 deletions benchmarks/sequence/align/benchmark_pairwise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from functools import partial
from pathlib import Path
import pytest
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.io.fasta as fasta
from tests.util import data_dir

GAP_PENALTY = (-10, -1)


@pytest.fixture(scope="module")
def sequences():
fasta_file = fasta.FastaFile.read(Path(data_dir("sequence")) / "cas9.fasta")
return [seq.ProteinSequence(s) for s in fasta_file.values()]


@pytest.fixture(scope="module")
def matrix():
return align.SubstitutionMatrix.std_protein_matrix()


@pytest.fixture(scope="module")
def seq_pair(sequences):
return sequences[0], sequences[1]


@pytest.fixture(scope="module")
def seed(seq_pair):
return (len(seq_pair[0]) // 2, len(seq_pair[1]) // 2)


@pytest.mark.benchmark
@pytest.mark.parametrize(
"method",
[
partial(align.align_optimal, gap_penalty=GAP_PENALTY),
partial(align.align_banded, band=(-50, 50), gap_penalty=GAP_PENALTY),
partial(align.align_local_gapped, threshold=100, gap_penalty=GAP_PENALTY),
],
ids=lambda x: x.func.__name__,
)
def benchmark_align_pairwise(seq_pair, matrix, seed, method):
"""
Perform pairwise sequence alignment using different algorithms.
"""
kwargs = {"seed": seed} if method.func is align.align_local_gapped else {}
method(seq_pair[0], seq_pair[1], matrix, **kwargs)
36 changes: 36 additions & 0 deletions benchmarks/sequence/align/benchmark_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np
import pytest
import biotite.sequence as seq
import biotite.sequence.align as align

SEQ_LENGTH = 10_000
K = 8
S = 4
WINDOW = 10
ALPHABET = seq.NucleotideSequence.alphabet_unamb
KMER_ALPHABET = align.KmerAlphabet(ALPHABET, K)


@pytest.fixture(scope="module")
def sequence():
np.random.seed(0)
s = seq.NucleotideSequence()
s.code = np.random.randint(len(ALPHABET), size=SEQ_LENGTH)
return s


@pytest.mark.parametrize(
"selector",
[
align.MinimizerSelector(KMER_ALPHABET, window=WINDOW),
align.SyncmerSelector(ALPHABET, K, S),
align.CachedSyncmerSelector(ALPHABET, K, S),
align.MincodeSelector(KMER_ALPHABET, compression=4),
],
ids=lambda x: x.__class__.__name__,
)
def benchmark_select(sequence, selector):
"""
Select k-mers from a sequence using different selection strategies.
"""
selector.select(sequence)
35 changes: 35 additions & 0 deletions benchmarks/sequence/benchmark_alphabet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest
import biotite.sequence as seq

SEQ_LENGTH = 10_000


@pytest.fixture(scope="module")
def alphabet():
return seq.ProteinSequence.alphabet


@pytest.fixture(scope="module")
def symbols():
return "ACDEFGHIKLMNPQRSTVWY" * (SEQ_LENGTH // 20)


@pytest.fixture(scope="module")
def code(alphabet, symbols):
return alphabet.encode_multiple(symbols)


@pytest.mark.benchmark
def benchmark_encode(alphabet, symbols):
"""
Encode symbols into a sequence code.
"""
alphabet.encode_multiple(symbols)


@pytest.mark.benchmark
def benchmark_decode(alphabet, code):
"""
Decode a sequence code into symbols.
"""
alphabet.decode_multiple(code)
27 changes: 27 additions & 0 deletions benchmarks/sequence/benchmark_phylo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np
import pytest
import biotite.sequence.phylo as phylo

N = 20


@pytest.fixture(scope="module")
def distances():
np.random.seed(0)
rand = np.random.rand(N, N).astype(np.float32)
distances = (rand + rand.T) / 2
np.fill_diagonal(distances, 0)
return distances


@pytest.mark.benchmark
@pytest.mark.parametrize(
"method",
[phylo.upgma, phylo.neighbor_joining],
ids=lambda x: x.__name__,
)
def benchmark_clustering(distances, method):
"""
Perform hierarchical clustering from a distance matrix.
"""
method(distances)
2 changes: 1 addition & 1 deletion benchmarks/structure/benchmark_alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
PDB_ID = "1aki"


@pytest.fixture
@pytest.fixture(scope="module")
def atoms():
pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / f"{PDB_ID}.bcif")
return pdbx.get_structure(pdbx_file, model=1, include_bonds=True)
Expand Down
107 changes: 107 additions & 0 deletions benchmarks/structure/benchmark_bonds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from pathlib import Path
import pytest
import biotite.structure as struc
import biotite.structure.info as info
import biotite.structure.io.pdbx as pdbx
from tests.util import data_dir

PDB_ID = "1aki"


@pytest.fixture(autouse=True, scope="session")
def load_ccd():
"""
Ensure that the CCD is already loaded to avoid biasing tests with its loading time.
"""
info.get_ccd()


@pytest.fixture(scope="module")
def atoms():
pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / f"{PDB_ID}.bcif")
return pdbx.get_structure(pdbx_file, model=1, include_bonds=True)


@pytest.fixture(scope="module")
def bond_array(atoms):
return atoms.bonds.as_array()


@pytest.mark.benchmark
def benchmark_bond_list_creation(atoms, bond_array):
"""
Create a `BondList` from an array of bonds, which involves sorting and deduplication.
"""
struc.BondList(atoms.array_length(), bond_array)


@pytest.mark.benchmark
@pytest.mark.parametrize(
"method",
[
struc.BondList.as_set,
struc.BondList.as_graph,
struc.BondList.as_array,
struc.BondList.get_all_bonds,
struc.BondList.adjacency_matrix,
struc.BondList.bond_type_matrix,
],
ids=lambda x: x.__name__,
)
def benchmark_conversion(atoms, method):
"""
Convert the `BondList` to a different representation.
"""
method(atoms.bonds)


@pytest.mark.benchmark
def benchmark_get_bonds(atoms):
"""
Get the bonds for each atom index.
"""
for i in range(atoms.array_length()):
atoms.bonds.get_bonds(i)


@pytest.mark.benchmark
def benchmark_get_all_bonds(atoms):
"""
Get the bonds for all atom indices.
"""
atoms.bonds.get_all_bonds()


@pytest.mark.benchmark
def benchmark_concatenate(atoms):
"""
Concatenate two `BondList` objects.
"""
atoms.bonds.concatenate([atoms.bonds, atoms.bonds])


@pytest.mark.parametrize(
"connect_fn", [struc.connect_via_distances, struc.connect_via_residue_names]
)
@pytest.mark.benchmark
def benchmark_connect(atoms, connect_fn):
"""
Find bonds between atoms using the specified method.
"""
connect_fn(atoms)


@pytest.mark.benchmark
def benchmark_find_connected(atoms):
"""
Find all connected atoms for a given atom index.
"""
struc.find_connected(atoms.bonds, 0)


@pytest.mark.benchmark
def benchmark_find_rotatable_bonds(atoms):
"""
Find all rotatable bonds in a `BondList`.
"""
struc.find_rotatable_bonds(atoms.bonds)
21 changes: 17 additions & 4 deletions benchmarks/structure/benchmark_celllist.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,28 @@
from tests.util import data_dir


@pytest.fixture
@pytest.fixture(scope="module")
def atoms():
pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / "1gya.bcif")
return pdbx.get_structure(pdbx_file, model=1)


def benchmark_cell_list(atoms):
@pytest.fixture(scope="module")
def cell_list(atoms):
return struc.CellList(atoms, 5.0)


@pytest.mark.benchmark
def benchmark_cell_list_creation(atoms):
"""
Create a cell list for a structure.
"""
struc.CellList(atoms, 5.0)


@pytest.mark.benchmark
def benchmark_cell_list_compute_contacts(cell_list, atoms):
"""
Find all contacts in a structure using a cell list.
Find all contacts in a structure using an existing cell list.
"""
cell_list = struc.CellList(atoms, 5.0)
cell_list.get_atoms(atoms.coord, 5.0)
16 changes: 16 additions & 0 deletions benchmarks/structure/benchmark_charges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pytest
import biotite.structure as struc
import biotite.structure.info as info


@pytest.fixture(scope="module")
def atoms():
return info.residue("PNN")


@pytest.mark.benchmark
def benchmark_partial_charges(atoms):
"""
Compute the partial charges of each atom in a structure.
"""
struc.partial_charges(atoms)
Loading
Loading