From 69e6f77afef7cd5f0fe78c5e85fc1b4efc2d6c37 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Mon, 18 May 2026 18:47:49 +0800 Subject: [PATCH 1/3] feat(lazy): structural patching for decode+modify+encode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add lazy patch support to avoid materializing the entire root container when modifying a few fields. Instead of triggering full materialization on __newindex, we now record patches and splice the original buffer during encode. Changes: - Add qjson_cursor_field_bytes FFI function to get field value byte spans - Modify LazyObject.__newindex to record patches instead of materializing - Modify LazyObject.__index to check patches first - Add encode_with_patches for splice-based encoding - Add lazy_patch_spec.lua tests Performance: decode+modify+encode on 100KB payload - Before: ~30 μs/op (full materialization + walking encode) - After: ~32 μs/op (WIP - splice path needs optimization) See docs/lazy-patch-spec.md for detailed design. --- docs/lazy-patch-spec.md | 504 ++++++++++++++++++++++++++++++++++ include/qjson.h | 2 + lua/qjson.lua | 2 + lua/qjson/table.lua | 343 ++++++++++++++++++++--- src/ffi.rs | 62 +++++ tests/lua/lazy_patch_spec.lua | 291 ++++++++++++++++++++ 6 files changed, 1163 insertions(+), 41 deletions(-) create mode 100644 docs/lazy-patch-spec.md create mode 100644 tests/lua/lazy_patch_spec.lua diff --git a/docs/lazy-patch-spec.md b/docs/lazy-patch-spec.md new file mode 100644 index 0000000..40e4551 --- /dev/null +++ b/docs/lazy-patch-spec.md @@ -0,0 +1,504 @@ +# Lazy Patch: Structural Patching for qjson.decode + +## Overview + +This spec describes an optimization for the "decode → modify few fields → encode" workflow. Instead of materializing the entire root container on first write, we record patches and apply them during encode by splicing the original buffer. + +## Problem Statement + +Current behavior when modifying a field on a lazy table: + +```lua +local tab = qjson.decode(json_str) -- Returns LazyObject proxy +tab.model = "gpt4o" -- Triggers __newindex → materializes entire root +local out = qjson.encode(tab) -- Walks materialized table + lazy subtrees +``` + +The `__newindex` handler (`lua/qjson/table.lua:266-285`) calls `materialize_object_contents()` which: +1. Iterates all key/value pairs via FFI +2. Clears the metatable +3. Copies all values into the plain table +4. Sets the new value + +Then `encode()` must: +1. Detect the table is no longer lazy (metatable = nil) +2. Call `encode_object()` which uses `pairs()` iteration +3. For each key/value, call `encode_string()` (byte-by-byte processing) +4. Call `table.concat()` to join parts with the large lazy subtree JSON + +### Performance Impact + +| Payload Size | Current | Theoretical Optimal | Gap | +|--------------|---------|---------------------|-----| +| 1 KB | 3.9 μs | 0.5 μs | 8x | +| 100 KB | 31.8 μs | 5.5 μs | 6x | +| 1 MB | 526.5 μs | 46.9 μs | 11x | + +The gap grows with payload size because `table.concat` with large strings has O(n) copy overhead. + +## Proposed Solution + +### Core Idea + +Instead of materializing on write, record patches in a side table. On encode, splice the original buffer with patched values. + +```lua +local tab = qjson.decode(json_str) -- Returns LazyObject proxy +tab.model = "gpt4o" -- Records patch: {key="model", new_value='"gpt4o"'} +local out = qjson.encode(tab) -- Splices: buf[0..model_start] + "gpt4o" + buf[model_end..] +``` + +### Data Structures + +#### Patch Record (Lua side) + +```lua +-- Stored in the lazy view's internal table +_patches = { + -- Each patch records the key and new encoded value + -- Byte offsets are resolved lazily during encode + { key = "model", encoded_value = '"gpt4o"' }, + { key = "temperature", encoded_value = "0.9" }, +} +``` + +#### Extended LazyObject/LazyArray + +```lua +-- Current internal fields +{ + _doc = , -- Reference to parsed document + _cur_box = , -- Cursor box (keeps cdata alive) + _cur = , -- Stable cursor reference + _bs = , -- Byte start in original buffer + _be = , -- Byte end in original buffer + + -- New fields for lazy patch + _patches = {}, -- Array of {key, encoded_value} or {index, encoded_value} + _deleted = {}, -- Set of deleted keys/indices (for nil assignment) +} +``` + +### Algorithm + +#### Modified `__newindex` (LazyObject) + +```lua +LazyObject.__newindex = function(t, k, v) + -- Initialize patches table if needed + if not rawget(t, "_patches") then + rawset(t, "_patches", {}) + rawset(t, "_deleted", {}) + end + + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + if v == nil then + -- Mark key as deleted + deleted[k] = true + -- Remove from patches if previously patched + for i, p in ipairs(patches) do + if p.key == k then + table.remove(patches, i) + break + end + end + else + -- Encode the new value + local encoded = encode_value(v) + + -- Update or add patch + local found = false + for _, p in ipairs(patches) do + if p.key == k then + p.encoded_value = encoded + found = true + break + end + end + if not found then + patches[#patches + 1] = { key = k, encoded_value = encoded } + end + + -- Remove from deleted if previously deleted + deleted[k] = nil + end +end +``` + +#### Modified `__index` (LazyObject) + +```lua +local original_index = LazyObject.__index + +LazyObject.__index = function(t, k) + -- Check patches first + local patches = rawget(t, "_patches") + if patches then + for _, p in ipairs(patches) do + if p.key == k then + -- Return decoded value from patch + -- Note: need to track original Lua value, not just encoded + return p.lua_value + end + end + end + + -- Check deleted + local deleted = rawget(t, "_deleted") + if deleted and deleted[k] then + return nil + end + + -- Fall back to original lazy lookup + return original_index(t, k) +end +``` + +#### Modified `encode_proxy` + +```lua +local function encode_proxy(t) + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + -- Fast path: no patches and not dirty + if not patches and not deleted and not is_dirty(t) then + return t._doc._hold:sub(t._bs + 1, t._be) + end + + -- Has patches: use splice encoding + if patches and #patches > 0 then + return encode_with_patches(t) + end + + -- Has deletions only or dirty children: fall back to walking + if getmetatable(t) == LazyObject then + return encode_lazy_object_walking(t) + end + return encode_lazy_array_walking(t) +end +``` + +#### New `encode_with_patches` + +```lua +local function encode_with_patches(t) + local buf = t._doc._hold + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") or {} + + -- Resolve byte offsets for each patch and collect spans to replace + local replacements = {} -- { {start, end, new_value}, ... } + + for _, p in ipairs(patches) do + local start_off, end_off = find_field_value_span(t, p.key) + if start_off then + -- Existing field: replace value + replacements[#replacements + 1] = { + start = start_off, + end_ = end_off, + value = p.encoded_value, + } + else + -- New field: will be appended + -- Handled separately below + end + end + + -- Collect deleted field spans + for k in pairs(deleted) do + local field_start, field_end = find_field_span(t, k) -- includes key and comma + if field_start then + replacements[#replacements + 1] = { + start = field_start, + end_ = field_end, + value = "", -- delete + } + end + end + + -- Sort by start offset + table.sort(replacements, function(a, b) return a.start < b.start end) + + -- Build output by splicing + local parts = {} + local pos = t._bs + 1 -- 1-based Lua index + + for _, r in ipairs(replacements) do + -- Copy unchanged portion + if r.start > pos then + parts[#parts + 1] = buf:sub(pos, r.start - 1) + end + -- Insert replacement + parts[#parts + 1] = r.value + pos = r.end_ + 1 + end + + -- Copy remaining portion + if pos <= t._be then + parts[#parts + 1] = buf:sub(pos, t._be) + end + + -- Handle new fields (not in original) + local new_fields = {} + for _, p in ipairs(patches) do + if not find_field_value_span(t, p.key) then + new_fields[#new_fields + 1] = '"' .. p.key .. '":' .. p.encoded_value + end + end + + if #new_fields > 0 then + -- Insert before closing brace + local result = table.concat(parts) + local close_pos = result:find("}%s*$") + if close_pos then + local prefix = result:sub(1, close_pos - 1) + -- Add comma if there are existing fields + if prefix:match("[^{%s]%s*$") then + prefix = prefix .. "," + end + return prefix .. table.concat(new_fields, ",") .. "}" + end + end + + return table.concat(parts) +end +``` + +### FFI Extension (Optional) + +For better performance, the byte offset resolution can be moved to Rust: + +```c +// New FFI function to get field byte span +int qjson_cursor_field_bytes( + const qjson_cursor* cur, + const char* key, size_t key_len, + size_t* value_start, // byte offset of value start (after colon) + size_t* value_end // byte offset of value end (before comma/brace) +); +``` + +This avoids re-parsing the structure in Lua to find field positions. + +## Edge Cases + +### 1. Nested Modifications + +```lua +tab.messages[1].role = "assistant" +``` + +This triggers materialization of `tab.messages[1]`, but `tab` and `tab.messages` remain lazy with patches. + +**Handling**: When a child is accessed and modified, the child gets its own `_patches`. The parent's `is_dirty()` check detects the materialized child and falls back to walking. + +### 2. Reading a Patched Field + +```lua +tab.model = "gpt4o" +print(tab.model) -- Should return "gpt4o", not the original value +``` + +**Handling**: `__index` checks `_patches` before falling back to FFI lookup. + +### 3. Deleting a Field + +```lua +tab.model = nil +``` + +**Handling**: Record in `_deleted` set. During encode, the field span (including key and comma) is removed. + +### 4. Adding a New Field + +```lua +tab.new_field = "value" +``` + +**Handling**: The patch has no corresponding byte span in the original buffer. During encode, new fields are appended before the closing brace. + +### 5. Type Changes + +```lua +tab.temperature = "hot" -- was number, now string +``` + +**Handling**: The encoded value is simply the new JSON representation. Type doesn't matter for splicing. + +### 6. Iteration After Patch + +```lua +tab.model = "gpt4o" +for k, v in qjson.pairs(tab) do + print(k, v) +end +``` + +**Handling**: `__pairs` must merge original fields with patches and exclude deleted fields. + +### 7. Multiple Patches to Same Field + +```lua +tab.model = "gpt4o" +tab.model = "gpt4o-mini" +``` + +**Handling**: The second assignment updates the existing patch entry. + +### 8. Patch Then Materialize + +```lua +tab.model = "gpt4o" +tab.messages[1].role = "assistant" -- This materializes messages[1] +-- tab still has patches, but tab.messages is now dirty +``` + +**Handling**: `is_dirty()` detects the materialized child. `encode_with_patches()` handles patched fields, then falls back to walking for dirty children. + +## Implementation Plan + +### Phase 1: Lua-only Implementation + +1. Modify `LazyObject.__newindex` to record patches instead of materializing +2. Modify `LazyObject.__index` to check patches first +3. Add `encode_with_patches()` function +4. Modify `encode_proxy()` to detect and use patches +5. Update `__pairs` to merge patches +6. Add tests for all edge cases + +### Phase 2: FFI Optimization (Optional) + +1. Add `qjson_cursor_field_bytes()` to Rust FFI +2. Use FFI for byte offset resolution instead of Lua string search +3. Benchmark and validate improvement + +## Testing Strategy + +### Unit Tests + +```lua +-- Basic patch +local tab = qjson.decode('{"a":1,"b":2}') +tab.a = 10 +assert(qjson.encode(tab) == '{"a":10,"b":2}') + +-- Read patched value +assert(tab.a == 10) + +-- Multiple patches +tab.b = 20 +assert(qjson.encode(tab) == '{"a":10,"b":20}') + +-- Delete field +tab.a = nil +assert(qjson.encode(tab) == '{"b":20}') + +-- Add new field +tab.c = 30 +assert(qjson.encode(tab) == '{"b":20,"c":30}') + +-- Nested modification (triggers partial materialization) +local tab2 = qjson.decode('{"x":{"y":1}}') +tab2.x.y = 2 +assert(qjson.encode(tab2) == '{"x":{"y":2}}') + +-- Iteration with patches +local tab3 = qjson.decode('{"a":1,"b":2}') +tab3.a = 10 +tab3.c = 3 +local keys = {} +for k, v in qjson.pairs(tab3) do keys[k] = v end +assert(keys.a == 10 and keys.b == 2 and keys.c == 3) +``` + +### Performance Tests + +```lua +-- Benchmark: 100KB payload, modify 1 field +local json = make_payload(100 * 1024) +local t0 = os.clock() +for i = 1, 10000 do + local tab = qjson.decode(json) + tab.model = "gpt4o" + local _ = qjson.encode(tab) +end +local t1 = os.clock() +-- Expected: < 10 μs/op (vs current ~30 μs/op) +``` + +## Risks and Mitigations + +### Risk 1: Complexity + +The patch tracking adds complexity to the lazy table implementation. + +**Mitigation**: Clear separation between patch path and materialization path. Comprehensive tests. + +### Risk 2: Memory Overhead + +Patches table adds memory per modified lazy view. + +**Mitigation**: Patches are small (key + encoded value). Only created on first write. + +### Risk 3: Correctness + +Splicing byte offsets is error-prone. + +**Mitigation**: Extensive edge case tests. Fallback to walking if splice fails. + +### Risk 4: Iteration Semantics + +`pairs()` behavior changes with patches. + +**Mitigation**: Document behavior. Ensure compatibility with existing code patterns. + +## Success Criteria + +1. **Performance**: 4-10x improvement for "decode → modify 1-2 fields → encode" workflow +2. **Correctness**: All existing tests pass, new edge case tests pass +3. **Compatibility**: No breaking changes to public API +4. **Memory**: No significant memory regression for unmodified lazy tables + +## Appendix: Benchmark Data + +### Current Implementation Profile + +``` +decode: 5.97 μs (20%) +modify: 0.37 μs (1%) -- triggers materialization +encode: 12.55 μs (41%) -- walks materialized root +total: 30.34 μs + +Breakdown of encode: +- encode messages (lazy fast-path): 0.09 μs +- table.concat with 100KB string: 8+ μs +- encode_string (byte-by-byte): 0.9 μs +- pairs iteration + type checks: 3+ μs +``` + +### Projected Optimized Profile + +``` +decode: 5.97 μs (55%) +modify: 0.10 μs (1%) -- just records patch +encode: 4.80 μs (44%) -- splices original buffer +total: 10.87 μs + +Breakdown of encode: +- resolve patch offsets: 0.5 μs +- string.sub splicing: 4.0 μs +- table.concat (small): 0.3 μs +``` + +### Expected Speedup by Payload Size + +| Payload | Current | Optimized | Speedup | +|---------|---------|-----------|---------| +| 1 KB | 3.9 μs | 0.5 μs | 8x | +| 10 KB | 5.3 μs | 1.1 μs | 5x | +| 100 KB | 31.8 μs | 5.5 μs | 6x | +| 500 KB | 209.7 μs | 22.6 μs | 9x | +| 1 MB | 526.5 μs | 46.9 μs | 11x | diff --git a/include/qjson.h b/include/qjson.h index 343e782..71bb66a 100644 --- a/include/qjson.h +++ b/include/qjson.h @@ -80,6 +80,8 @@ int qjson_cursor_get_bool (const qjson_cursor*, const char* path, size_t path_le int qjson_cursor_typeof (const qjson_cursor*, const char* path, size_t path_len, int* out); int qjson_cursor_len (const qjson_cursor*, const char* path, size_t path_len, size_t* out); int qjson_cursor_bytes (const qjson_cursor*, size_t* byte_start, size_t* byte_end); +int qjson_cursor_field_bytes(const qjson_cursor*, const char* key, size_t key_len, + size_t* value_start, size_t* value_end); int qjson_cursor_object_entry_at(const qjson_cursor*, size_t i, const uint8_t** key_ptr, size_t* key_len, qjson_cursor* value_out); diff --git a/lua/qjson.lua b/lua/qjson.lua index 3e57324..e34496f 100644 --- a/lua/qjson.lua +++ b/lua/qjson.lua @@ -38,6 +38,8 @@ int qjson_cursor_get_bool(const qjson_cursor*, const char*, size_t, int*); int qjson_cursor_typeof (const qjson_cursor*, const char*, size_t, int*); int qjson_cursor_len (const qjson_cursor*, const char*, size_t, size_t*); int qjson_cursor_bytes(const qjson_cursor*, size_t* byte_start, size_t* byte_end); +int qjson_cursor_field_bytes(const qjson_cursor*, const char* key, size_t key_len, + size_t* value_start, size_t* value_end); int qjson_cursor_object_entry_at(const qjson_cursor*, size_t i, const uint8_t** key_ptr, size_t* key_len, qjson_cursor* value_out); diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index f23c076..acd6e7b 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -115,6 +115,23 @@ end -- raw table rather than creating a fresh proxy. local function read_object_field(self, key) if type(key) ~= "string" then return nil end + + -- Check patches first + local patches = rawget(self, "_patches") + if patches then + for _, p in ipairs(patches) do + if p.key == key then + return p.lua_value + end + end + end + + -- Check deleted + local deleted = rawget(self, "_deleted") + if deleted and deleted[key] then + return nil + end + -- Use child_box so the lookup result does not alias self._cur (which is -- itself stored in root_box's backing memory in the decode caller). local rc = C.qjson_cursor_field(self._cur, key, #key, child_box) @@ -148,21 +165,63 @@ LazyArray.__index = read_array_index -- Iterator function for lazy_object_iter: advances through object entries by -- integer index, returning key/value pairs in source order. +-- Handles patches and deletions. local function lazy_object_iter(state, _prev_key) - local i = state.i - state.i = i + 1 - local rc = C.qjson_cursor_object_entry_at( - state.view._cur, i, strp_box, size_box, child_box - ) - if rc == QJSON_NOT_FOUND then return nil end - check(rc) - local k = ffi.string(strp_box[0], size_box[0]) - local v = decode_cursor(state.view, child_box) - return k, v + local view = state.view + local patches = rawget(view, "_patches") + local deleted = rawget(view, "_deleted") + + -- First, iterate through original fields (skipping deleted ones) + while state.i < state.original_count do + local i = state.i + state.i = i + 1 + local rc = C.qjson_cursor_object_entry_at( + view._cur, i, strp_box, size_box, child_box + ) + if rc == QJSON_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + + -- Skip deleted keys + if deleted and deleted[k] then + -- continue to next iteration + else + -- Check if this key has a patch + if patches then + for _, p in ipairs(patches) do + if p.key == k then + return k, p.lua_value + end + end + end + -- No patch, return original value + local v = decode_cursor(view, child_box) + return k, v + end + end + + -- Then, iterate through new fields (patches for keys not in original) + if patches then + while state.patch_i <= #patches do + local p = patches[state.patch_i] + state.patch_i = state.patch_i + 1 + -- Check if this is a new field (not in original) + local rc = C.qjson_cursor_field_bytes(view._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + return p.key, p.lua_value + end + end + end + + return nil end function LazyObject.__pairs(t) - return lazy_object_iter, { view = t, i = 0 }, nil + -- Count original fields + local rc = C.qjson_cursor_len(t._cur, "", 0, size_box) + check(rc) + local original_count = tonumber(size_box[0]) + return lazy_object_iter, { view = t, i = 0, original_count = original_count, patch_i = 1 }, nil end local function lazy_array_iter(state, _prev_i) @@ -256,32 +315,55 @@ end -- the dirty check and __newindex can share the list. local INTERNAL_KEYS = { _doc = true, _cur_box = true, _cur = true, _bs = true, _be = true, + _patches = true, _deleted = true, } --- On first write, walk all existing key/value pairs into a plain table, --- strip the lazy metatable, then apply the new assignment. Any FFI error --- during the walk leaves `t` in its original lazy state. --- Existing rawget-cached entries (e.g. previously returned child proxies) --- are preserved so callers' references remain valid. +-- Forward declaration for encode (needed by __newindex to encode patch values) +local encode + +-- On write, record a patch instead of materializing the entire object. +-- This allows encode to splice the original buffer with patched values. LazyObject.__newindex = function(t, k, v) - local contents = materialize_object_contents(t) - -- Snapshot user-key cache BEFORE nilling internals. - -- Use next() for raw iteration: pairs() invokes __pairs on lazy tables, - -- walking the full JSON via FFI instead of the Lua-side rawget cache. - local cache = {} - local ck, cv = next(t) - while ck ~= nil do - if not INTERNAL_KEYS[ck] then - cache[ck] = cv - end - ck, cv = next(t, ck) + -- Initialize patches table if needed + if not rawget(t, "_patches") then + rawset(t, "_patches", {}) + rawset(t, "_deleted", {}) end - t._doc, t._cur_box, t._cur, t._bs, t._be = nil, nil, nil, nil, nil - setmetatable(t, nil) - for _, kv in ipairs(contents) do - rawset(t, kv[1], cache[kv[1]] or kv[2]) + + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + if v == nil then + -- Mark key as deleted + deleted[k] = true + -- Remove from patches if previously patched + for i, p in ipairs(patches) do + if p.key == k then + table.remove(patches, i) + break + end + end + else + -- Encode the new value (we need both encoded and lua_value) + local encoded = encode(v) + + -- Update or add patch + local found = false + for _, p in ipairs(patches) do + if p.key == k then + p.encoded_value = encoded + p.lua_value = v + found = true + break + end + end + if not found then + patches[#patches + 1] = { key = k, encoded_value = encoded, lua_value = v } + end + + -- Remove from deleted if previously deleted + deleted[k] = nil end - rawset(t, k, v) end -- On first write, walk all existing elements into a plain sequence, @@ -405,14 +487,22 @@ local function encode_number(n) end -- A lazy subtree is "dirty" if any cached descendant has been materialized --- (no longer carries Lazy* metatable). Non-cached descendants are guaranteed --- untouched, so we only need to walk the rawget-cached entries. +-- (no longer carries Lazy* metatable), or if it has patches/deletions. +-- Non-cached descendants are guaranteed untouched, so we only need to walk +-- the rawget-cached entries. local function is_dirty(v) if type(v) ~= "table" then return false end local mt = getmetatable(v) if mt ~= LazyObject and mt ~= LazyArray then return true -- materialized end + -- Check for patches or deletions + local patches = rawget(v, "_patches") + local deleted = rawget(v, "_deleted") + if patches and #patches > 0 then return true end + if deleted then + for _ in pairs(deleted) do return true end + end -- Use next() for raw table iteration: pairs() would invoke __pairs on -- lazy tables, walking the full JSON via FFI instead of the Lua cache. local k, child = next(v) @@ -425,10 +515,19 @@ local function is_dirty(v) return false end --- Forward declaration so encode_lazy_object_walking, encode_lazy_array_walking, --- and encode_array/encode_object can reference encode before its definition is --- complete (Lua resolves upvalues at call time, but the slot must be declared first). -local encode +-- Check if a lazy view has patches (but not necessarily dirty children) +local function has_patches(v) + local patches = rawget(v, "_patches") + return patches and #patches > 0 +end + +-- Check if a lazy view has deletions +local function has_deletions(v) + local deleted = rawget(v, "_deleted") + if not deleted then return false end + for _ in pairs(deleted) do return true end + return false +end -- Walk a dirty LazyObject and emit JSON, preferring cached children (which -- may be materialized) over freshly resolved cursors. Non-cached children @@ -474,14 +573,176 @@ local function encode_lazy_array_walking(t) return "[" .. table.concat(parts, ",") .. "]" end +-- Check if any cached child is dirty (materialized or has patches) +local function has_dirty_children(t) + local k, child = next(t) + while k ~= nil do + if not INTERNAL_KEYS[k] then + if type(child) == "table" then + local mt = getmetatable(child) + if mt ~= LazyObject and mt ~= LazyArray then + return true -- materialized child + end + if is_dirty(child) then + return true + end + end + end + k, child = next(t, k) + end + return false +end + +-- Encode a LazyObject with patches by splicing the original buffer. +-- This is the fast path for "decode -> modify few fields -> encode". +local function encode_with_patches(t) + local buf = t._doc._hold + local patches = rawget(t, "_patches") or {} + local deleted = rawget(t, "_deleted") or {} + + -- Build a set of patched keys for quick lookup + local patched_keys = {} + for _, p in ipairs(patches) do + patched_keys[p.key] = p + end + + -- Collect replacements: { {start, end_, value}, ... } + local replacements = {} + + -- For each patch, find the field's byte range in the original buffer + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_OK then + -- Existing field: replace value + replacements[#replacements + 1] = { + start = tonumber(sz_a[0]), + end_ = tonumber(sz_b[0]), + value = p.encoded_value, + } + end + -- If NOT_FOUND, it's a new field - handled separately below + end + + -- Collect deleted field spans (we need to find the full field span including key) + -- For simplicity, we'll handle deletions by walking and skipping deleted keys + local has_deleted = false + for _ in pairs(deleted) do has_deleted = true; break end + + -- If we have deletions or new fields, fall back to walking + -- (splicing deletions is complex due to comma handling) + local new_fields = {} + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + new_fields[#new_fields + 1] = p + end + end + + if has_deleted or #new_fields > 0 then + -- Fall back to walking for complex cases + return encode_lazy_object_walking_with_patches(t, patches, deleted) + end + + -- Sort replacements by start offset + table.sort(replacements, function(a, b) return a.start < b.start end) + + -- Build output by splicing + local parts = {} + local pos = t._bs + 1 -- 1-based Lua index + + for _, r in ipairs(replacements) do + -- Copy unchanged portion (convert 0-based to 1-based) + local r_start_1based = r.start + 1 + if r_start_1based > pos then + parts[#parts + 1] = buf:sub(pos, r_start_1based - 1) + end + -- Insert replacement + parts[#parts + 1] = r.value + pos = r.end_ + 1 -- end_ is exclusive, so +1 for 1-based + end + + -- Copy remaining portion + if pos <= t._be then + parts[#parts + 1] = buf:sub(pos, t._be) + end + + return table.concat(parts) +end + +-- Walk a LazyObject with patches, handling deletions and new fields +local function encode_lazy_object_walking_with_patches(t, patches, deleted) + local parts = {} + + -- Build a set of patched keys for quick lookup + local patched_keys = {} + for _, p in ipairs(patches) do + patched_keys[p.key] = p + end + + -- Walk original fields + local i = 0 + while true do + local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) + if rc == QJSON_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + + -- Skip deleted keys + if not deleted[k] then + local v + local patch = patched_keys[k] + if patch then + -- Use patched value (already encoded) + parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value + else + -- Use original or cached value + local cached = rawget(t, k) + if cached ~= nil and not INTERNAL_KEYS[k] then + v = cached + else + v = decode_cursor(t, child_box) + end + parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) + end + end + i = i + 1 + end + + -- Add new fields (patches for keys not in original) + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + end + end + + return "{" .. table.concat(parts, ",") .. "}" +end + local function encode_proxy(t) - if not is_dirty(t) then - -- Fast path: no mutations — slice the original buffer bytes. + local patches = rawget(t, "_patches") + local deleted = rawget(t, "_deleted") + + -- Check if we have patches or deletions + local has_patch = patches and #patches > 0 + local has_del = false + if deleted then + for _ in pairs(deleted) do has_del = true; break end + end + + -- Fast path: no mutations at all — slice the original buffer bytes. + if not has_patch and not has_del and not has_dirty_children(t) then return t._doc._hold:sub(t._bs + 1, t._be) end + + -- Has patches: use splice encoding for objects if getmetatable(t) == LazyObject then - return encode_lazy_object_walking(t) + if has_patch and not has_dirty_children(t) then + return encode_with_patches(t) + end + return encode_lazy_object_walking_with_patches(t, patches or {}, deleted or {}) end + return encode_lazy_array_walking(t) end diff --git a/src/ffi.rs b/src/ffi.rs index d4d8cec..7e1d327 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -806,6 +806,68 @@ pub unsafe extern "C" fn qjson_cursor_object_entry_at( }) } +/// Given an object cursor and a key, write the byte range `[value_start, value_end)` +/// of the field's value in the original buffer. Returns `QJSON_NOT_FOUND` if the +/// key does not exist, `QJSON_TYPE_MISMATCH` if the cursor is not an object. +/// +/// # Safety +/// +/// See the module-level [shared safety contract](self#shared-safety-contract). +/// `c` must point to a cursor produced by an earlier `qjson_*` call whose +/// document is still alive; `key` must point to `key_len` bytes or be NULL +/// with `key_len == 0`; `value_start` and `value_end` must be non-NULL and +/// writable. +#[no_mangle] +pub unsafe extern "C" fn qjson_cursor_field_bytes( + c: *const qjson_cursor, + key: *const c_char, key_len: usize, + value_start: *mut usize, value_end: *mut usize, +) -> c_int { + ffi_catch!({ + if value_start.is_null() || value_end.is_null() || (key.is_null() && key_len != 0) { + return qjson_err::QJSON_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let k = if key.is_null() { &[][..] } else { + std::slice::from_raw_parts(key as *const u8, key_len) + }; + // Resolve the field to get a child cursor + let child = match crate::cursor::resolve_single_key(d, cur, k) { + Ok(x) => x, Err(e) => return e as c_int, + }; + // Get the byte range of the child value + let pos = d.indices[child.idx_start as usize] as usize; + let lead = match d.buf.get(pos) { + Some(b) => *b, + None => return qjson_err::QJSON_PARSE_ERROR as c_int, + }; + match lead { + b'{' | b'[' | b'"' => { + // Container or string: span runs from opener to the matching + // closer, inclusive. + let end = d.indices[child.idx_end as usize] as usize; + if end >= d.buf.len() { + return qjson_err::QJSON_PARSE_ERROR as c_int; + } + *value_start = pos; + *value_end = end + 1; + qjson_err::QJSON_OK as c_int + } + _ => { + // Scalar: delegate to scalar_byte_range. + let (s, e) = match scalar_byte_range(d, child) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *value_start = s; + *value_end = e; + qjson_err::QJSON_OK as c_int + } + } + }) +} + /// Test-only export that forces a Rust panic to verify the FFI panic barrier /// converts it to `QJSON_OOM` instead of unwinding across the boundary. /// diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua new file mode 100644 index 0000000..0f73d46 --- /dev/null +++ b/tests/lua/lazy_patch_spec.lua @@ -0,0 +1,291 @@ +local qjson = require("qjson") + +describe("Lazy Patch - basic patching", function() + it("patches a single field and encodes correctly", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = 10 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("reads patched value back", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = 10 + assert.are.equal(10, t.a) + assert.are.equal(2, t.b) + end) + + it("patches multiple fields", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.a = 10 + t.b = 20 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a) + assert.are.equal(20, parsed.b) + assert.are.equal(3, parsed.c) + end) + + it("patches the same field multiple times", function() + local t = qjson.decode('{"a":1}') + t.a = 10 + t.a = 20 + t.a = 30 + assert.are.equal(30, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(30, parsed.a) + end) +end) + +describe("Lazy Patch - new fields", function() + it("adds a new field", function() + local t = qjson.decode('{"a":1}') + t.b = 2 + assert.are.equal(1, t.a) + assert.are.equal(2, t.b) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(1, parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("adds multiple new fields", function() + local t = qjson.decode('{"a":1}') + t.b = 2 + t.c = 3 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(1, parsed.a) + assert.are.equal(2, parsed.b) + assert.are.equal(3, parsed.c) + end) +end) + +describe("Lazy Patch - deletion", function() + it("deletes a field", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = nil + assert.is_nil(t.a) + assert.are.equal(2, t.b) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_nil(parsed.a) + assert.are.equal(2, parsed.b) + end) + + it("deletes then re-adds a field", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = nil + assert.is_nil(t.a) + t.a = 100 + assert.are.equal(100, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(100, parsed.a) + assert.are.equal(2, parsed.b) + end) +end) + +describe("Lazy Patch - type changes", function() + it("changes number to string", function() + local t = qjson.decode('{"a":1}') + t.a = "hello" + assert.are.equal("hello", t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal("hello", parsed.a) + end) + + it("changes string to number", function() + local t = qjson.decode('{"a":"hello"}') + t.a = 42 + assert.are.equal(42, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(42, parsed.a) + end) + + it("changes scalar to object", function() + local t = qjson.decode('{"a":1}') + t.a = {x = 10} + assert.are.equal(10, t.a.x) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a.x) + end) + + it("changes scalar to array", function() + local t = qjson.decode('{"a":1}') + t.a = {10, 20, 30} + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.same({10, 20, 30}, parsed.a) + end) +end) + +describe("Lazy Patch - subtrees remain lazy", function() + it("patching root does not materialize children", function() + local t = qjson.decode('{"a":{"x":1},"b":2}') + t.b = 20 + -- Child should still be lazy + assert.are.equal(qjson._LazyObject, getmetatable(t.a)) + assert.are.equal(1, t.a.x) + end) + + it("patching root preserves lazy array children", function() + local t = qjson.decode('{"a":[1,2,3],"b":2}') + t.b = 20 + -- Child should still be lazy + assert.are.equal(qjson._LazyArray, getmetatable(t.a)) + assert.are.equal(1, t.a[1]) + end) +end) + +describe("Lazy Patch - iteration with patches", function() + it("iterates with patched values", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.a = 10 + local keys = {} + local values = {} + for k, v in qjson.pairs(t) do + keys[#keys+1] = k + values[k] = v + end + assert.are.equal(10, values.a) + assert.are.equal(2, values.b) + assert.are.equal(3, values.c) + end) + + it("iterates with new fields", function() + local t = qjson.decode('{"a":1}') + t.b = 2 + local keys = {} + local values = {} + for k, v in qjson.pairs(t) do + keys[#keys+1] = k + values[k] = v + end + assert.are.equal(1, values.a) + assert.are.equal(2, values.b) + end) + + it("iterates skipping deleted fields", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.b = nil + local keys = {} + local values = {} + for k, v in qjson.pairs(t) do + keys[#keys+1] = k + values[k] = v + end + assert.are.equal(1, values.a) + assert.is_nil(values.b) + assert.are.equal(3, values.c) + end) +end) + +describe("Lazy Patch - nested modifications", function() + it("nested object modification triggers child materialization", function() + local t = qjson.decode('{"a":{"x":1},"b":2}') + t.a.x = 10 + -- Child is now materialized + assert.is_nil(getmetatable(t.a)) + assert.are.equal(10, t.a.x) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed.a.x) + assert.are.equal(2, parsed.b) + end) + + it("nested array modification triggers child materialization", function() + local t = qjson.decode('{"a":[1,2,3],"b":2}') + t.a[2] = 20 + -- Child is now materialized + assert.are.equal(qjson.empty_array_mt, getmetatable(t.a)) + assert.are.equal(20, t.a[2]) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.same({1, 20, 3}, parsed.a) + assert.are.equal(2, parsed.b) + end) +end) + +describe("Lazy Patch - edge cases", function() + it("handles empty object", function() + local t = qjson.decode('{}') + t.a = 1 + assert.are.equal(1, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(1, parsed.a) + end) + + it("handles special characters in keys", function() + local t = qjson.decode('{"a.b":1}') + t["a.b"] = 10 + assert.are.equal(10, t["a.b"]) + end) + + it("handles unicode in values", function() + local t = qjson.decode('{"a":"hello"}') + t.a = "hello world" + assert.are.equal("hello world", t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal("hello world", parsed.a) + end) + + it("handles boolean values", function() + local t = qjson.decode('{"a":true}') + t.a = false + assert.is_false(t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_false(parsed.a) + end) + + it("handles null values", function() + local t = qjson.decode('{"a":1}') + t.a = qjson.null + assert.are.equal(qjson.null, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(cjson.null, parsed.a) + end) +end) + +describe("Lazy Patch - metatable preservation", function() + it("LazyObject metatable is preserved after patching", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = 10 + assert.are.equal(qjson._LazyObject, getmetatable(t)) + end) + + it("can still access original fields after patching", function() + local t = qjson.decode('{"a":1,"b":2,"c":3}') + t.a = 10 + assert.are.equal(10, t.a) + assert.are.equal(2, t.b) + assert.are.equal(3, t.c) + end) +end) From 176558a1a0ef6ba71288a50525e36c5379f2f6a2 Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Mon, 18 May 2026 13:40:58 +0000 Subject: [PATCH 2/3] fix(lazy-patch): forward-ref bug + update test assertions - Move encode_lazy_object_walking_with_patches before encode_with_patches (Lua local function defined later was unresolvable at compile time, causing nil-call errors on new-field/deletion paths). - Update tests in lazy_patch_spec.lua and lazy_table_spec.lua to match the spec's edge-case-1 design: writing to a LazyObject records a patch on the object instead of materializing it, so the metatable stays LazyObject after assignment. --- lua/qjson/table.lua | 114 ++++++++++++++++------------------ tests/lua/lazy_patch_spec.lua | 7 ++- tests/lua/lazy_table_spec.lua | 10 +-- 3 files changed, 63 insertions(+), 68 deletions(-) diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index acd6e7b..eab82f8 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -593,12 +593,9 @@ local function has_dirty_children(t) return false end --- Encode a LazyObject with patches by splicing the original buffer. --- This is the fast path for "decode -> modify few fields -> encode". -local function encode_with_patches(t) - local buf = t._doc._hold - local patches = rawget(t, "_patches") or {} - local deleted = rawget(t, "_deleted") or {} +-- Walk a LazyObject with patches, handling deletions and new fields +local function encode_lazy_object_walking_with_patches(t, patches, deleted) + local parts = {} -- Build a set of patched keys for quick lookup local patched_keys = {} @@ -606,6 +603,53 @@ local function encode_with_patches(t) patched_keys[p.key] = p end + -- Walk original fields + local i = 0 + while true do + local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) + if rc == QJSON_NOT_FOUND then break end + check(rc) + local k = ffi.string(strp_box[0], size_box[0]) + + -- Skip deleted keys + if not deleted[k] then + local v + local patch = patched_keys[k] + if patch then + -- Use patched value (already encoded) + parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value + else + -- Use original or cached value + local cached = rawget(t, k) + if cached ~= nil and not INTERNAL_KEYS[k] then + v = cached + else + v = decode_cursor(t, child_box) + end + parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) + end + end + i = i + 1 + end + + -- Add new fields (patches for keys not in original) + for _, p in ipairs(patches) do + local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) + if rc == QJSON_NOT_FOUND then + parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + end + end + + return "{" .. table.concat(parts, ",") .. "}" +end + +-- Encode a LazyObject with patches by splicing the original buffer. +-- This is the fast path for "decode -> modify few fields -> encode". +local function encode_with_patches(t) + local buf = t._doc._hold + local patches = rawget(t, "_patches") or {} + local deleted = rawget(t, "_deleted") or {} + -- Collect replacements: { {start, end_, value}, ... } local replacements = {} @@ -630,16 +674,16 @@ local function encode_with_patches(t) -- If we have deletions or new fields, fall back to walking -- (splicing deletions is complex due to comma handling) - local new_fields = {} + local has_new_field = false for _, p in ipairs(patches) do local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_NOT_FOUND then - new_fields[#new_fields + 1] = p + has_new_field = true + break end end - if has_deleted or #new_fields > 0 then - -- Fall back to walking for complex cases + if has_deleted or has_new_field then return encode_lazy_object_walking_with_patches(t, patches, deleted) end @@ -669,56 +713,6 @@ local function encode_with_patches(t) return table.concat(parts) end --- Walk a LazyObject with patches, handling deletions and new fields -local function encode_lazy_object_walking_with_patches(t, patches, deleted) - local parts = {} - - -- Build a set of patched keys for quick lookup - local patched_keys = {} - for _, p in ipairs(patches) do - patched_keys[p.key] = p - end - - -- Walk original fields - local i = 0 - while true do - local rc = C.qjson_cursor_object_entry_at(t._cur, i, strp_box, size_box, child_box) - if rc == QJSON_NOT_FOUND then break end - check(rc) - local k = ffi.string(strp_box[0], size_box[0]) - - -- Skip deleted keys - if not deleted[k] then - local v - local patch = patched_keys[k] - if patch then - -- Use patched value (already encoded) - parts[#parts + 1] = encode_string(k) .. ":" .. patch.encoded_value - else - -- Use original or cached value - local cached = rawget(t, k) - if cached ~= nil and not INTERNAL_KEYS[k] then - v = cached - else - v = decode_cursor(t, child_box) - end - parts[#parts + 1] = encode_string(k) .. ":" .. encode(v) - end - end - i = i + 1 - end - - -- Add new fields (patches for keys not in original) - for _, p in ipairs(patches) do - local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) - if rc == QJSON_NOT_FOUND then - parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value - end - end - - return "{" .. table.concat(parts, ",") .. "}" -end - local function encode_proxy(t) local patches = rawget(t, "_patches") local deleted = rawget(t, "_deleted") diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua index 0f73d46..7506efb 100644 --- a/tests/lua/lazy_patch_spec.lua +++ b/tests/lua/lazy_patch_spec.lua @@ -199,11 +199,12 @@ describe("Lazy Patch - iteration with patches", function() end) describe("Lazy Patch - nested modifications", function() - it("nested object modification triggers child materialization", function() + it("nested object modification records a patch on the child (child stays lazy)", function() local t = qjson.decode('{"a":{"x":1},"b":2}') t.a.x = 10 - -- Child is now materialized - assert.is_nil(getmetatable(t.a)) + -- Per lazy-patch-spec edge case 1: parent and child both stay lazy; + -- the child records its own patch instead of materializing. + assert.are.equal(qjson._LazyObject, getmetatable(t.a)) assert.are.equal(10, t.a.x) local out = qjson.encode(t) local cjson = require("cjson") diff --git a/tests/lua/lazy_table_spec.lua b/tests/lua/lazy_table_spec.lua index 2769d39..a7a0acf 100644 --- a/tests/lua/lazy_table_spec.lua +++ b/tests/lua/lazy_table_spec.lua @@ -168,20 +168,20 @@ describe("__ipairs / qjson.ipairs over LazyArray", function() end) end) -describe("__newindex — first-write materialization", function() - it("converts LazyObject into a plain table preserving existing keys", function() +describe("__newindex — patch tracking on lazy objects", function() + it("LazyObject stays lazy after assignment, with new field readable", function() local t = qjson.decode('{"a":1,"b":2}') t.c = 3 - assert.is_nil(getmetatable(t)) + assert.are.equal(qjson._LazyObject, getmetatable(t)) assert.are.equal(1, t.a) assert.are.equal(2, t.b) assert.are.equal(3, t.c) end) - it("nested containers remain lazy after parent materialization", function() + it("nested containers remain lazy when sibling field is written", function() local t = qjson.decode('{"inner":{"x":1}}') t.extra = "y" - assert.is_nil(getmetatable(t)) + assert.are.equal(qjson._LazyObject, getmetatable(t)) local inner = t.inner assert.are.equal(qjson._LazyObject, getmetatable(inner)) assert.are.equal(1, inner.x) From 208d8b9ecf1efd3208c832f77d7f4972dbcdafcd Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Mon, 18 May 2026 13:47:05 +0000 Subject: [PATCH 3/3] fix(lazy-patch): address review feedback on PR #44 - LazyObject.__newindex: raise on non-string keys instead of silently recording an unreadable patch (Copilot C3). - Remove unused has_patches / has_deletions helpers (Copilot C2). - encode_with_patches: classify each patch in a single qjson_cursor_field_bytes pass (was doing two) and propagate unexpected return codes via check() (Copilot C4 + CodeRabbit R6). - encode_lazy_object_walking_with_patches: propagate unexpected qjson_cursor_field_bytes return codes (CodeRabbit R7). - lazy_object_iter: same error-propagation fix (CodeRabbit R5). - tests/lua/lazy_patch_spec.lua: extend special-key test to verify encoded output, swap the unicode test to multi-byte UTF-8, add "delete non-existent field", "all originals deleted + new fields added", and "non-string key raises" cases (CodeRabbit R8/R9/R10 + nitpick N2). - docs/lazy-patch-spec.md: add upfront note that the listing is design pseudocode (pointing helper names at qjson_cursor_field_bytes), and correct the patch-record example to include lua_value alongside encoded_value (CodeRabbit R1 + nitpick N3). --- docs/lazy-patch-spec.md | 25 ++++++++++-------- lua/qjson/table.lua | 48 ++++++++++++----------------------- tests/lua/lazy_patch_spec.lua | 43 ++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 46 deletions(-) diff --git a/docs/lazy-patch-spec.md b/docs/lazy-patch-spec.md index 40e4551..c49aedb 100644 --- a/docs/lazy-patch-spec.md +++ b/docs/lazy-patch-spec.md @@ -1,5 +1,7 @@ # Lazy Patch: Structural Patching for qjson.decode +> **Note:** This document is the original *design* memo. The Lua snippets below are high-level pseudocode showing intent — they are not the shipped code. The actual implementation lives in `lua/qjson/table.lua` and `src/ffi.rs`; the splice path is used only for the "all patches target existing fields, no deletions" case, while deletions and new fields fall through to a walking-based encoder (`encode_lazy_object_walking_with_patches`) that avoids the comma-rewriting issues that the splice pseudocode below has. Helper names like `find_field_value_span` correspond to the FFI symbol `qjson_cursor_field_bytes` (`src/ffi.rs:821`). + ## Overview This spec describes an optimization for the "decode → modify few fields → encode" workflow. Instead of materializing the entire root container on first write, we record patches and apply them during encode by splicing the original buffer. @@ -53,12 +55,13 @@ local out = qjson.encode(tab) -- Splices: buf[0..model_start] + "gpt4o" + #### Patch Record (Lua side) ```lua --- Stored in the lazy view's internal table +-- Stored in the lazy view's internal table. +-- Each patch records key, encoded JSON value, and the original Lua value +-- (so __index can hand back the user's value without re-decoding). +-- Byte offsets are resolved lazily during encode via qjson_cursor_field_bytes. _patches = { - -- Each patch records the key and new encoded value - -- Byte offsets are resolved lazily during encode - { key = "model", encoded_value = '"gpt4o"' }, - { key = "temperature", encoded_value = "0.9" }, + { key = "model", encoded_value = '"gpt4o"', lua_value = "gpt4o" }, + { key = "temperature", encoded_value = "0.9", lua_value = 0.9 }, } ``` @@ -105,23 +108,23 @@ LazyObject.__newindex = function(t, k, v) end end else - -- Encode the new value + -- Encode the new value (we keep both encoded and the original Lua + -- value so __index can return v without re-decoding). local encoded = encode_value(v) - - -- Update or add patch + local found = false for _, p in ipairs(patches) do if p.key == k then p.encoded_value = encoded + p.lua_value = v found = true break end end if not found then - patches[#patches + 1] = { key = k, encoded_value = encoded } + patches[#patches + 1] = { key = k, encoded_value = encoded, lua_value = v } end - - -- Remove from deleted if previously deleted + deleted[k] = nil end end diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index eab82f8..7c9e004 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -210,6 +210,7 @@ local function lazy_object_iter(state, _prev_key) if rc == QJSON_NOT_FOUND then return p.key, p.lua_value end + check(rc) -- propagate unexpected errors end end @@ -324,6 +325,9 @@ local encode -- On write, record a patch instead of materializing the entire object. -- This allows encode to splice the original buffer with patched values. LazyObject.__newindex = function(t, k, v) + if type(k) ~= "string" then + error("qjson: LazyObject key must be a string, got " .. type(k)) + end -- Initialize patches table if needed if not rawget(t, "_patches") then rawset(t, "_patches", {}) @@ -515,20 +519,6 @@ local function is_dirty(v) return false end --- Check if a lazy view has patches (but not necessarily dirty children) -local function has_patches(v) - local patches = rawget(v, "_patches") - return patches and #patches > 0 -end - --- Check if a lazy view has deletions -local function has_deletions(v) - local deleted = rawget(v, "_deleted") - if not deleted then return false end - for _ in pairs(deleted) do return true end - return false -end - -- Walk a dirty LazyObject and emit JSON, preferring cached children (which -- may be materialized) over freshly resolved cursors. Non-cached children -- emit through a fresh proxy and naturally fast-path their unmodified subtree. @@ -637,6 +627,8 @@ local function encode_lazy_object_walking_with_patches(t, patches, deleted) local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_NOT_FOUND then parts[#parts + 1] = encode_string(p.key) .. ":" .. p.encoded_value + elseif rc ~= QJSON_OK then + check(rc) -- propagate unexpected errors end end @@ -650,39 +642,31 @@ local function encode_with_patches(t) local patches = rawget(t, "_patches") or {} local deleted = rawget(t, "_deleted") or {} - -- Collect replacements: { {start, end_, value}, ... } + -- Classify each patch into a replacement (existing field) or a new field + -- in a single FFI pass. Replacements carry the byte span; presence of any + -- new field forces the walking fallback. local replacements = {} - - -- For each patch, find the field's byte range in the original buffer + local has_new_field = false for _, p in ipairs(patches) do local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) if rc == QJSON_OK then - -- Existing field: replace value replacements[#replacements + 1] = { start = tonumber(sz_a[0]), end_ = tonumber(sz_b[0]), value = p.encoded_value, } + elseif rc == QJSON_NOT_FOUND then + has_new_field = true + else + check(rc) -- propagate unexpected errors end - -- If NOT_FOUND, it's a new field - handled separately below end - -- Collect deleted field spans (we need to find the full field span including key) - -- For simplicity, we'll handle deletions by walking and skipping deleted keys + -- Deletions and new fields are handled by the walking fallback, + -- which avoids the comma-rewriting headache the splice path would have. local has_deleted = false for _ in pairs(deleted) do has_deleted = true; break end - -- If we have deletions or new fields, fall back to walking - -- (splicing deletions is complex due to comma handling) - local has_new_field = false - for _, p in ipairs(patches) do - local rc = C.qjson_cursor_field_bytes(t._cur, p.key, #p.key, sz_a, sz_b) - if rc == QJSON_NOT_FOUND then - has_new_field = true - break - end - end - if has_deleted or has_new_field then return encode_lazy_object_walking_with_patches(t, patches, deleted) end diff --git a/tests/lua/lazy_patch_spec.lua b/tests/lua/lazy_patch_spec.lua index 7506efb..b50ec0a 100644 --- a/tests/lua/lazy_patch_spec.lua +++ b/tests/lua/lazy_patch_spec.lua @@ -242,16 +242,21 @@ describe("Lazy Patch - edge cases", function() local t = qjson.decode('{"a.b":1}') t["a.b"] = 10 assert.are.equal(10, t["a.b"]) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.are.equal(10, parsed["a.b"]) end) it("handles unicode in values", function() local t = qjson.decode('{"a":"hello"}') - t.a = "hello world" - assert.are.equal("hello world", t.a) + local unicode = "你好 🌏 café" + t.a = unicode + assert.are.equal(unicode, t.a) local out = qjson.encode(t) local cjson = require("cjson") local parsed = cjson.decode(out) - assert.are.equal("hello world", parsed.a) + assert.are.equal(unicode, parsed.a) end) it("handles boolean values", function() @@ -273,6 +278,38 @@ describe("Lazy Patch - edge cases", function() local parsed = cjson.decode(out) assert.are.equal(cjson.null, parsed.a) end) + + it("deletes a non-existent field without error", function() + local t = qjson.decode('{"a":1}') + t.b = nil + assert.is_nil(t.b) + assert.are.equal(1, t.a) + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_nil(parsed.b) + assert.are.equal(1, parsed.a) + end) + + it("handles all original fields deleted plus new fields added", function() + local t = qjson.decode('{"a":1,"b":2}') + t.a = nil + t.b = nil + t.c = 3 + t.d = 4 + local out = qjson.encode(t) + local cjson = require("cjson") + local parsed = cjson.decode(out) + assert.is_nil(parsed.a) + assert.is_nil(parsed.b) + assert.are.equal(3, parsed.c) + assert.are.equal(4, parsed.d) + end) + + it("rejects non-string keys on LazyObject", function() + local t = qjson.decode('{"a":1}') + assert.has_error(function() t[1] = "x" end) + end) end) describe("Lazy Patch - metatable preservation", function()