Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
a memory knob for constrained systems and a way to validate that an
encoded stream stays within a given window.

Both configs gained `with_*` builders (`EncoderConfig::default().with_level(9)
.with_max_distance(4096)`, `DecoderConfig::default().with_window_size(4096)`).

### Changed

- **Breaking:** `deflate::EncoderConfig` and `deflate::DecoderConfig` are now
`#[non_exhaustive]`. Construct them via `default()` + the `with_*` builders
instead of a struct literal; in return, future tuning knobs can be added
without breaking downstream code.

- **Format auto-detection** (`factory::detect`): sniff a stream's leading
bytes and return the matching codec name by magic signature (gzip, zlib,
xz, zstd, bzip2, lz4-frame, RAR, StuffIt/StuffIt 5), feature-gated so only
Expand Down
28 changes: 28 additions & 0 deletions src/deflate/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ use crate::traits::{RawDecoder, RawProgress};
/// If `dictionary` is longer than 32 KiB only the trailing 32 KiB is
/// retained (the rest is unreachable from any back-reference). An empty
/// dictionary — the default — is equivalent to the older configless API.
/// `#[non_exhaustive]`: construct via [`DecoderConfig::default`] and the
/// `with_*` builders rather than a struct literal, so new options can be added
/// in future without breaking downstream code.
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct DecoderConfig {
/// Bytes to load into the sliding window before decoding. Up to the
/// last `window_size` bytes are retained.
Expand All @@ -46,6 +50,30 @@ impl Default for DecoderConfig {
}
}

impl DecoderConfig {
/// Default configuration: no preset dictionary, full 32 KiB window.
pub fn new() -> Self {
Self::default()
}

/// Seed the sliding window with a preset dictionary (the last
/// `window_size` bytes are retained).
#[must_use]
pub fn with_dictionary(mut self, dictionary: Vec<u8>) -> Self {
self.dictionary = dictionary;
self
}

/// Set the sliding-window size in bytes (clamped to `1..=WINDOW_SIZE`).
/// A smaller window decodes streams produced for a small-window decoder
/// and uses less memory; back-references beyond it are rejected.
#[must_use]
pub fn with_window_size(mut self, window_size: usize) -> Self {
self.window_size = window_size;
self
}
}

use super::tables::{
CODE_LENGTH_ORDER, DIST_BASE, DIST_EXTRA, END_OF_BLOCK, FIXED_DIST_LENGTHS, FIXED_LIT_LENGTHS,
LENGTH_BASE, LENGTH_EXTRA, WINDOW_SIZE,
Expand Down
27 changes: 27 additions & 0 deletions src/deflate/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ const WINDOW_MAX: usize = 2 * WINDOW_SIZE;
/// back-reference farther than 4096 bytes, so set `max_distance: 4096` to
/// target it. The value is clamped to `1..=WINDOW_SIZE`; it only constrains
/// the encoder (decoding always supports the full window).
/// `#[non_exhaustive]`: construct via [`EncoderConfig::default`] and the
/// `with_*` builders rather than a struct literal, so new tuning knobs can be
/// added in future without breaking downstream code.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub struct EncoderConfig {
/// Compression level in `1..=9`.
pub level: u8,
Expand All @@ -71,6 +75,29 @@ impl Default for EncoderConfig {
}
}

impl EncoderConfig {
/// Default configuration: level 6, full 32 KiB window.
pub fn new() -> Self {
Self::default()
}

/// Set the compression level (clamped to `1..=9` at encoder build time).
#[must_use]
pub fn with_level(mut self, level: u8) -> Self {
self.level = level;
self
}

/// Cap the LZ77 match distance (clamped to `1..=WINDOW_SIZE`). Lower it to
/// target a decoder with a smaller sliding window (e.g. `4096` for
/// qemu/qcow2's 4 KiB inflate window).
#[must_use]
pub fn with_max_distance(mut self, max_distance: usize) -> Self {
self.max_distance = max_distance;
self
}
}

/// Internal expansion of [`EncoderConfig::level`] into the match-finder
/// tuning knobs the LZ77 pass actually consults. The table mirrors zlib's
/// `configuration_table`: higher levels widen the chain budget and raise
Expand Down
109 changes: 31 additions & 78 deletions tests/deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,7 @@ fn round_trip_mixed_corpus_default_level() {
// ─── level-specific tests ───────────────────────────────────────────────

fn encode_at_level(input: &[u8], level: u8) -> Vec<u8> {
let mut enc = Encoder::with_config(EncoderConfig {
level,
..Default::default()
});
let mut enc = Encoder::with_config(EncoderConfig::default().with_level(level));
encode_chunked(&mut enc, input, 4096, 4096)
}

Expand All @@ -387,10 +384,7 @@ fn round_trip_level_1() {
b"hello world",
&b"abcabcabcabcabc".repeat(100)[..],
] {
let mut enc = Encoder::with_config(EncoderConfig {
level: 1,
..Default::default()
});
let mut enc = Encoder::with_config(EncoderConfig::default().with_level(1));
let encoded = encode_chunked(&mut enc, input, 4096, 4096);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, input);
Expand All @@ -404,10 +398,7 @@ fn round_trip_level_9() {
b"hello world",
&b"abcabcabcabcabc".repeat(100)[..],
] {
let mut enc = Encoder::with_config(EncoderConfig {
level: 9,
..Default::default()
});
let mut enc = Encoder::with_config(EncoderConfig::default().with_level(9));
let encoded = encode_chunked(&mut enc, input, 4096, 4096);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, input);
Expand Down Expand Up @@ -437,16 +428,10 @@ fn out_of_range_level_is_clamped() {
// Level 0 and level 250 should both produce valid streams (clamped to
// 1 and 9 respectively) — we don't expose a fallible constructor.
let input = b"the rain in spain falls mainly on the plain";
let mut enc_lo = Encoder::with_config(EncoderConfig {
level: 0,
..Default::default()
});
let mut enc_lo = Encoder::with_config(EncoderConfig::default().with_level(0));
let enc_lo_out = encode_chunked(&mut enc_lo, input, 4096, 4096);
assert_eq!(decode_chunked(&enc_lo_out, 4096, 4096).unwrap(), input);
let mut enc_hi = Encoder::with_config(EncoderConfig {
level: 250,
..Default::default()
});
let mut enc_hi = Encoder::with_config(EncoderConfig::default().with_level(250));
let enc_hi_out = encode_chunked(&mut enc_hi, input, 4096, 4096);
assert_eq!(decode_chunked(&enc_hi_out, 4096, 4096).unwrap(), input);
}
Expand Down Expand Up @@ -484,10 +469,7 @@ fn reset_preserves_level_and_allows_reuse() {
let input_a = b"alpha alpha alpha alpha alpha".as_slice();
let input_b = b"bravo bravo bravo bravo bravo".as_slice();

let mut enc = Encoder::with_config(EncoderConfig {
level: 9,
..Default::default()
});
let mut enc = Encoder::with_config(EncoderConfig::default().with_level(9));
let encoded_a = encode_chunked(&mut enc, input_a, 4096, 4096);
enc.reset();
let encoded_b = encode_chunked(&mut enc, input_b, 4096, 4096);
Expand All @@ -497,10 +479,7 @@ fn reset_preserves_level_and_allows_reuse() {

// After reset, an encoder configured at level 9 should still be at
// level 9. Compare with a fresh level-9 encoder on the same input.
let mut fresh = Encoder::with_config(EncoderConfig {
level: 9,
..Default::default()
});
let mut fresh = Encoder::with_config(EncoderConfig::default().with_level(9));
let fresh_b = encode_chunked(&mut fresh, input_b, 4096, 4096);
assert_eq!(encoded_b, fresh_b, "reset must preserve compression level");
}
Expand Down Expand Up @@ -603,14 +582,8 @@ fn algorithm_encoder_decoder_round_trip() {
#[test]
fn algorithm_encoder_with_uses_config() {
let input = b"abcabcabcabcabcabc".repeat(100);
let mut enc_lo = <Deflate as Algorithm>::encoder_with(EncoderConfig {
level: 1,
..Default::default()
});
let mut enc_hi = <Deflate as Algorithm>::encoder_with(EncoderConfig {
level: 9,
..Default::default()
});
let mut enc_lo = <Deflate as Algorithm>::encoder_with(EncoderConfig::default().with_level(1));
let mut enc_hi = <Deflate as Algorithm>::encoder_with(EncoderConfig::default().with_level(9));
let lo = encode_chunked(&mut enc_lo, &input, 4096, 4096);
let hi = encode_chunked(&mut enc_hi, &input, 4096, 4096);
assert!(
Expand Down Expand Up @@ -742,19 +715,15 @@ fn deflate_decoder_preset_dictionary_decodes_cross_block_backref() {
);

// With dictionary: decoding succeeds and yields the original payload.
let with_dict = Decoder::with_config(DecoderConfig {
dictionary: dictionary.clone(),
..Default::default()
});
let with_dict =
Decoder::with_config(DecoderConfig::default().with_dictionary(dictionary.clone()));
let out = drain_full(with_dict, &encoded).unwrap();
assert_eq!(out, expected);

// Same fixture via Algorithm::decoder_with — confirms the wiring up
// through the public type-associated config type is sound.
let from_algo: Decoder = Deflate::decoder_with(DecoderConfig {
dictionary,
..Default::default()
});
let from_algo: Decoder =
Deflate::decoder_with(DecoderConfig::default().with_dictionary(dictionary));
let out = drain_full(from_algo, &encoded).unwrap();
assert_eq!(out, expected);
}
Expand All @@ -779,10 +748,8 @@ fn deflate_decoder_reset_keep_window_preserves_history_for_mszip() {
// a single-block empty deflate stream (BFINAL=1 BTYPE=01 EOB).
// That advances the decoder to Done while leaving the window full
// of the dictionary text.
let mut dec = Decoder::with_config(DecoderConfig {
dictionary: dictionary.clone(),
..Default::default()
});
let mut dec =
Decoder::with_config(DecoderConfig::default().with_dictionary(dictionary.clone()));
// Empty fixed-Huffman block: BFINAL=1, BTYPE=01, then EOB (code 256
// = 7-bit 0b0000000). Packed LSB-first: 0b00000011 = 0x03, 0x00.
let empty_block = [0x03u8, 0x00];
Expand All @@ -807,10 +774,8 @@ fn deflate_decoder_full_reset_drops_dictionary() {
let dictionary: Vec<u8> = b"the quick brown fox jumps over the lazy dog. ".to_vec();
let encoded = hex("2bc1a238b3182204569c9887a2431100");

let mut dec = Decoder::with_config(DecoderConfig {
dictionary: dictionary.clone(),
..Default::default()
});
let mut dec =
Decoder::with_config(DecoderConfig::default().with_dictionary(dictionary.clone()));
let empty_block = [0x03u8, 0x00];
let mut buf = vec![0u8; 64];
let (_, _) = dec.decode(&empty_block, &mut buf).unwrap();
Expand All @@ -833,10 +798,7 @@ fn deflate_decoder_full_reset_drops_dictionary() {
#[test]
fn deflate_decoder_preset_dictionary_long_is_truncated() {
let huge = vec![0xAAu8; 48 * 1024]; // 48 KiB of one byte
let dec = Decoder::with_config(DecoderConfig {
dictionary: huge.clone(),
..Default::default()
});
let dec = Decoder::with_config(DecoderConfig::default().with_dictionary(huge.clone()));
// Decode an empty block — just smoke-test that construction worked.
let empty_block = [0x03u8, 0x00];
let mut buf = vec![0u8; 64];
Expand Down Expand Up @@ -872,17 +834,15 @@ fn max_distance_cap_suppresses_far_matches_and_round_trips() {
input.extend_from_slice(&marker);

// Full 32 KiB window (default): can reference the marker ~8 KiB back.
let mut enc_full = Encoder::with_config(EncoderConfig {
level: 9,
..Default::default()
});
let mut enc_full = Encoder::with_config(EncoderConfig::default().with_level(9));
let full = encode_chunked(&mut enc_full, &input, input.len(), 4096);

// 4 KiB cap: the far marker match is out of range.
let mut enc_cap = Encoder::with_config(EncoderConfig {
level: 9,
max_distance: 4096,
});
let mut enc_cap = Encoder::with_config(
EncoderConfig::default()
.with_level(9)
.with_max_distance(4096),
);
let capped = encode_chunked(&mut enc_cap, &input, input.len(), 4096);

// Both must decode back to the original.
Expand Down Expand Up @@ -918,18 +878,14 @@ fn small_window_decoder_accepts_capped_and_rejects_far_refs() {
}
input.extend_from_slice(&marker);

let win = || {
Deflate::decoder_with(DecoderConfig {
window_size: 4096,
..Default::default()
})
};
let win = || Deflate::decoder_with(DecoderConfig::default().with_window_size(4096));

// Capped encoder → a 4 KiB-window decoder reads it correctly.
let mut enc_cap = Encoder::with_config(EncoderConfig {
level: 9,
max_distance: 4096,
});
let mut enc_cap = Encoder::with_config(
EncoderConfig::default()
.with_level(9)
.with_max_distance(4096),
);
let capped = encode_chunked(&mut enc_cap, &input, input.len(), 4096);
assert_eq!(
decode_with_decoder(win(), &capped, capped.len(), 4096).unwrap(),
Expand All @@ -938,10 +894,7 @@ fn small_window_decoder_accepts_capped_and_rejects_far_refs() {

// Full-window encoder uses the far (~8 KiB) match → the 4 KiB-window
// decoder rejects it, just like qemu would.
let mut enc_full = Encoder::with_config(EncoderConfig {
level: 9,
..Default::default()
});
let mut enc_full = Encoder::with_config(EncoderConfig::default().with_level(9));
let full = encode_chunked(&mut enc_full, &input, input.len(), 4096);
let res = decode_with_decoder(win(), &full, full.len(), 4096);
assert!(
Expand Down
Loading