diff --git a/.gitignore b/.gitignore index 04e481ee9..18d3ae2da 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,6 @@ callgrind.out.* # beacon node oapi json file beacon-node-oapi.json flamegraphs/ + +# benchmark data +/bench/data/ diff --git a/.iex.exs b/.iex.exs index af5fa1e37..b1a9bb4bd 100644 --- a/.iex.exs +++ b/.iex.exs @@ -19,3 +19,15 @@ block_info = fn "0x"<>root -> root |> Base.decode16(case: :lower) |> elem(1) |> blocks_by_status = fn status -> Blocks.get_blocks_with_status(status) |> elem(1) end blocks_by_status_count = fn status -> blocks_by_status.(status) |> Enum.count() end + +# Memory introspection (see lib/utils/mem.ex) +alias LambdaEthereumConsensus.Mem +# Quick access: +# Mem.report() — full memory report +# Mem.ets_tables() — all ETS tables ranked by memory +# Mem.top_processes(10) — top 10 processes by heap +# Mem.state_cache_detail() — per-entry BlockStates breakdown +# Mem.checkpoint_detail() — checkpoint states table +# Mem.binary_stats() — binary/refc binary pressure +# Mem.cache_tables() — StateTransition cache sizes +# snap = Mem.snapshot(); ...; Mem.diff_snapshot(snap) — delta tracking diff --git a/docs/perf/benchmarking.md b/docs/perf/benchmarking.md new file mode 100644 index 000000000..2b8ea428d --- /dev/null +++ b/docs/perf/benchmarking.md @@ -0,0 +1,127 @@ +# Block Processing Benchmarks + +Reproducible benchmarks for measuring block processing performance on real mainnet/testnet data. The workflow has two steps: download data from a Beacon API node, then replay blocks through the `ForkChoice.process_block` pipeline offline. + +## Quick Start + +```bash +# 1. Download 2 epochs (64 slots) from a Fulu-compatible node +mix bench.download \ + --url http://localhost:5052 \ + --start-slot 9649056 \ + --count 64 + +# 2. Run the benchmark +mix bench.blocks --data-dir bench/data/slot_9649056_64 +``` + +## Step 1: Download Data + +`mix bench.download` fetches state, blocks, and blob sidecars from a Beacon API, converts blobs to data columns (via KZG cell computation), and saves everything to disk. + +### Options + +| Flag | Required | Default | Description | +|------|----------|---------|-------------| +| `--url` | yes | | Beacon API base URL (e.g. `http://localhost:5052`) | +| `--start-slot` | yes | | Slot to anchor from (should be an epoch boundary) | +| `--count` | yes | | Number of slots after start to fetch | +| `--data-dir` | no | `bench/data` | Base directory for output | +| `--network` | no | `mainnet` | Network config (mainnet, sepolia, holesky, etc.) | + +### Choosing a Start Slot + +Pick a slot that is an **epoch boundary** (divisible by 32). This ensures the anchor state is at the start of an epoch, which is the natural checkpoint alignment for the forkchoice store. The task warns if the slot is not aligned. + +To find a recent finalized epoch boundary: + +```bash +# Query finalized slot from your beacon node +curl -s http://localhost:5052/eth/v1/beacon/headers/finalized | jq '.data.header.message.slot' +# Round down to epoch boundary: slot - (slot % 32) +``` + +### Output Structure + +``` +bench/data/slot__/ + metadata.json # Download parameters + timestamp + network + state.ssz_snappy # Anchor state (BeaconState) at start-slot + block_.ssz_snappy # Anchor block + all non-empty blocks in range + columns_/ # Data columns per block (Fulu, only if block has blobs) + column_.ssz_snappy +``` + +Missing block files mean the slot was empty (no block proposed). This is normal; mainnet typically has ~1-3% empty slots. + +### Requirements + +The Beacon API node must: +- Serve the `/eth/v2/debug/beacon/states/{slot}` endpoint (SSZ) +- Serve the `/eth/v2/beacon/blocks/{slot}` endpoint (SSZ) +- Serve the `/eth/v1/beacon/blob_sidecars/{slot}` endpoint (JSON) +- Have state and blocks available for the requested slot range (not pruned) +- Be on the same fork as the compiled `.fork_version` (currently Fulu) + +## Step 2: Process Blocks + +`mix bench.blocks` loads cached data from disk, boots the necessary infrastructure (LevelDB, ETS caches, mocked execution engine), and replays blocks through the full `ForkChoice.process_block` pipeline. + +### Options + +| Flag | Required | Default | Description | +|------|----------|---------|-------------| +| `--data-dir` | yes | | Path to a downloaded dataset directory | +| `--log-level` | no | `info` | Logger level (`debug`, `info`, `warning`, `error`) | + +### What Gets Booted + +The task starts a minimal subset of the supervision tree, matching the `db` operation mode: + +- LevelDB (temporary directory, discarded after run) +- ETS caches (Blocks, BlockStates, CheckpointStates) +- StateTransition cache +- Task supervisors (for async state storage and pruning) +- Mocked Engine API (always returns `VALID` for execution payloads) + +No networking, no Beacon API, no validator logic. + +### Example Output + +``` +=== Block Processing Benchmark === +Slots: 9649056 -> 9649120 +Blocks: 61 / 64 (3 empty slots) +Epochs: 2 boundaries crossed + +Total time: 18.7s +Avg per block: 306ms +Epoch blocks: [slot 9649088: 8.2s] +Non-epoch avg: 14ms +``` + +At `info` log level, each block also emits per-step timings from the state transition: + +``` +[on_block] slot=9649088 root=A1B2C3D4 epoch=true epoch.justification_and_finalization=1200ms epoch.rewards_and_penalties=3400ms ... +``` + +Use `--log-level warning` to suppress per-block logs and see only the summary. + +## Typical Ranges for Benchmarking + +| Goal | Suggested `--count` | Notes | +|------|-------------------|-------| +| Quick sanity check | 32 (1 epoch) | Fast, but only 1 epoch boundary | +| Standard benchmark | 64-128 (2-4 epochs) | Good balance of data and runtime | +| Full performance profile | 200+ (6+ epochs) | Multiple epoch boundaries, better averages | +| Epoch-only analysis | 32 | Start at slot N-1 of epoch boundary to isolate epoch cost | + +## Reusing Downloaded Data + +Downloaded datasets are self-contained (state + blocks + columns + metadata) and can be: +- Shared between team members (copy the directory) +- Rerun after code changes to compare before/after +- Stored long-term as regression baselines + +The `bench/data/` directory is gitignored. diff --git a/lib/lambda_ethereum_consensus/beacon/pending_blocks.ex b/lib/lambda_ethereum_consensus/beacon/pending_blocks.ex index fb2edc9a9..717973167 100644 --- a/lib/lambda_ethereum_consensus/beacon/pending_blocks.ex +++ b/lib/lambda_ethereum_consensus/beacon/pending_blocks.ex @@ -14,6 +14,7 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do alias LambdaEthereumConsensus.StateTransition.DasCore alias LambdaEthereumConsensus.Store.Blobs alias LambdaEthereumConsensus.Store.Blocks + alias LambdaEthereumConsensus.Store.DataColumnDb alias LambdaEthereumConsensus.Store.DataColumns alias LambdaEthereumConsensus.Utils alias Types.BlockInfo @@ -34,6 +35,19 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do @type state :: nil @download_retries 100 + # Max blocks to process per retry_download_columns invocation. + # Keeps memory bounded by yielding the GenServer between batches, + # allowing GC to reclaim BeaconState objects (~300MB each). + @retry_batch_size 5 + # Max blocks to process per process_blocks invocation. + # Yielding the GenServer between batches allows load shedding and + # GC to run, preventing unbounded message queue growth during catch-up. + @process_batch_size 5 + # Max retries for "parent state not found" errors before marking invalid. + # Each retry is delayed by 5 seconds. This gives the async LevelDB write + # time to complete (~15 seconds total) while preventing infinite spin loops + # when the state is truly lost (e.g., processed during catch-up mode). + @max_state_retries 3 @doc """ If the block is not present, it will be stored as pending. @@ -51,11 +65,12 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do @spec add_block(Store.t(), SignedBeaconBlock.t()) :: Store.t() def add_block(store, signed_block) do block_info = BlockInfo.from_block(signed_block) - loaded_block = Blocks.get_block_info(block_info.root) + loaded_block = Blocks.get_block_info_cached(block_info.root) log_md = [slot: signed_block.message.slot, root: block_info.root] - # If the block is new or was to be downloaded, we store it. - if is_nil(loaded_block) or loaded_block.status == :download do + # If the block is new, was to be downloaded, or was previously marked invalid + # (e.g. due to transient data availability failures), we (re-)process it. + if is_nil(loaded_block) or loaded_block.status in [:download, :invalid] do if HardForkAliasInjection.fulu?() do add_block_fulu(store, block_info, log_md) else @@ -112,7 +127,7 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do # Ensure the retry heartbeat is running so partial/empty responses # or transient errors don't leave this block permanently stuck. - Process.send_after(self(), :retry_download_columns, 60_000) + Process.send_after(self(), :retry_download_columns, 12_000) block_info |> BlockInfo.change_status(:download_columns) @@ -122,6 +137,40 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do end end + @doc """ + On startup, resets blocks that were marked :invalid due to transient failures + (e.g. data not available during catch-up sync). Blocks with signed_block data + are moved back to :download_columns (Fulu) so they can be re-evaluated. + Blocks without signed_block data (download markers) remain :invalid. + """ + @spec recover_invalid_blocks() :: :ok | :recovered + def recover_invalid_blocks() do + case Blocks.get_blocks_with_status(:invalid) do + {:ok, blocks} -> + blocks + |> Enum.filter(fn %BlockInfo{signed_block: sb} -> not is_nil(sb) end) + |> recover_blocks() + + {:error, reason} -> + Logger.warning("[PendingBlocks] Failed to get invalid blocks for recovery: #{reason}") + :ok + end + end + + defp recover_blocks([]), do: :ok + + defp recover_blocks(recoverable) do + Logger.info( + "[PendingBlocks] Recovering #{length(recoverable)} previously-invalid blocks on startup" + ) + + target_status = + if HardForkAliasInjection.fulu?(), do: :download_columns, else: :download_blobs + + Enum.each(recoverable, &Blocks.change_status(&1, target_status)) + :recovered + end + @doc """ Sends any blocks that are ready to block processing. This should usually be called only by this module after receiving a new block, but there are some other cases like at node startup, as there @@ -131,13 +180,46 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do def process_blocks(store) do case Blocks.get_blocks_with_status(:pending) do {:ok, blocks} -> - blocks - |> Enum.sort_by(fn %BlockInfo{} = block_info -> block_info.signed_block.message.slot end) - # Could we process just one/a small amount of blocks at a time? would it make more sense? - |> Enum.reduce(store, fn block_info, store -> - {store, _state} = process_block(store, block_info) - store - end) + # Defensive filter: a :pending block should always carry its + # signed_block payload (status transitions to :pending via + # change_status from :download_blobs/:download_columns, never from + # :download placeholders). But the 2026-04-20 22:30 crash loop left + # the store with at least one :pending entry whose signed_block was + # nil, causing BadMapError here. Skipping such entries lets the + # remaining pending blocks progress; logging lets us investigate the + # upstream corruption separately. + {valid, broken} = + Enum.split_with(blocks, fn %BlockInfo{signed_block: sb} -> not is_nil(sb) end) + + if broken != [] do + Logger.warning( + "[PendingBlocks] Skipping #{length(broken)} :pending block(s) with nil signed_block" <> + " (roots: #{Enum.map_join(broken, ",", fn b -> Base.encode16(b.root) |> String.slice(0, 8) end)})" + ) + end + + sorted = + Enum.sort_by(valid, fn %BlockInfo{} = block_info -> + block_info.signed_block.message.slot + end) + + # Process blocks in small batches, yielding the GenServer between + # batches so load shedding, GC, and other handlers can run. + # Without batching, processing 60+ blocks in one callback kept + # the GenServer busy for 3-5 minutes, causing mailbox overflow. + {batch, rest} = Enum.split(sorted, @process_batch_size) + + store = + Enum.reduce(batch, store, fn block_info, store -> + {store, _state} = process_block(store, block_info) + store + end) + + if rest != [] do + Process.send_after(self(), :retry_pending_blocks, 100) + end + + store {:error, reason} -> Logger.error( @@ -153,20 +235,22 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do """ @spec process_blobs(Store.t(), {:ok, [Types.BlobSidecar.t()]}) :: {:ok, Store.t()} def process_blobs(store, {:ok, blobs}) do - blobs - |> Blobs.add_blobs() - |> Enum.reduce(store, fn root, store -> - with %BlockInfo{status: :download_blobs} = block_info <- Blocks.get_block_info(root), - [] <- Blobs.missing_for_block(block_info) do - block_info - |> Blocks.change_status(:pending) - |> then(&process_block_and_check_children(store, &1)) - - {:ok, store} - else - _ -> {:ok, store} - end - end) + new_store = + blobs + |> Blobs.add_blobs() + |> Enum.reduce(store, fn root, store -> + with %BlockInfo{status: :download_blobs} = block_info <- + Blocks.get_block_info_cached(root), + [] <- Blobs.missing_for_block(block_info) do + block_info + |> Blocks.change_status(:pending) + |> then(&process_block_and_check_children(store, &1)) + else + _ -> store + end + end) + + {:ok, new_store} end @spec process_blobs(Store.t(), {:error, any()}) :: {:ok, Store.t()} @@ -182,37 +266,51 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do """ @spec process_data_columns(Store.t(), {:ok, [Types.DataColumnSidecar.t()]}) :: {:ok, Store.t()} def process_data_columns(store, {:ok, sidecars}) do - sidecars - |> DataColumns.add_columns() - |> Enum.reduce(store, fn root, store -> - with %BlockInfo{status: :download_columns} = block_info <- Blocks.get_block_info(root), - [] <- - DataColumns.missing_columns_for_block( - block_info, - DasCore.get_local_custody_columns() - ) do - block_info - |> Blocks.change_status(:pending) - |> then(&process_block_and_check_children(store, &1)) - - {:ok, store} - else - _ -> {:ok, store} - end - end) + custody_cols = DasCore.get_local_custody_columns() + + new_store = + sidecars + |> DataColumns.add_columns() + |> Enum.reduce(store, fn root, store -> + with %BlockInfo{status: :download_columns} = block_info <- + Blocks.get_block_info_cached(root), + [] <- + DataColumns.missing_columns_for_block(block_info, custody_cols) do + block_info + |> Blocks.change_status(:pending) + |> then(&process_block_and_check_children(store, &1)) + else + # Partial response: some columns received but others still missing. + # Immediately re-request the remaining columns instead of waiting + # 30-60s for the retry timer. This is the most common case on mainnet + # where a peer custodies some but not all of our required columns. + still_missing when is_list(still_missing) and still_missing != [] -> + Logger.debug( + "[PendingBlocks] Partial column response, #{length(still_missing)} still missing. Re-requesting immediately." + ) + + request_missing_columns(Blocks.get_block_info_cached(root), custody_cols) + store + + _ -> + store + end + end) + + {:ok, new_store} end @spec process_data_columns(Store.t(), {:error, :no_peers}) :: {:ok, Store.t()} def process_data_columns(store, {:error, :no_peers}) do Logger.warning("[PendingBlocks] No peers for data column download, scheduling retry") - Process.send_after(self(), :retry_download_columns, 30_000) + Process.send_after(self(), :retry_download_columns, 5_000) {:ok, store} end @spec process_data_columns(Store.t(), {:error, any()}) :: {:ok, Store.t()} def process_data_columns(store, {:error, reason}) do Logger.error("[PendingBlocks] Error downloading data columns: #{inspect(reason)}") - Process.send_after(self(), :retry_download_columns, 30_000) + Process.send_after(self(), :retry_download_columns, 5_000) {:ok, store} end @@ -225,13 +323,54 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do case Blocks.get_blocks_with_status(:download_columns) do {:ok, blocks} -> custody_cols = DasCore.get_local_custody_columns() - Enum.each(blocks, &request_missing_columns(&1, custody_cols)) + + # Defensive filter: a :download_columns block should always carry its + # signed_block (it got to this status after a successful block arrival + # via `add_block_fulu`). But the 2026-04-20 22:30 crash-loop left + # corrupted entries with nil signed_block, which crash + # `DataColumns.missing_columns_for_block` (it does + # `block.message.body.blob_kzg_commitments`). Skip those; upstream + # corruption will be addressed separately. Same pattern as + # `process_blocks/1`. + blocks = Enum.filter(blocks, fn %BlockInfo{signed_block: sb} -> not is_nil(sb) end) + + {ready, need_download} = + Enum.split_with(blocks, fn block_info -> + DataColumns.missing_columns_for_block(block_info, custody_cols) == [] + end) + + # Process only a small batch to prevent OOM from accumulating + # BeaconStates (~300MB each) in memory. Yielding the GenServer + # between batches allows GC and prevents message queue buildup. + {batch, rest} = Enum.split(ready, @retry_batch_size) + + if batch != [] do + Logger.info( + "[PendingBlocks] Processing #{length(batch)} of #{length(ready)} ready blocks" <> + " (#{length(need_download)} still downloading)" + ) + end + + store = + Enum.reduce(batch, store, fn block_info, acc -> + block_info + |> Blocks.change_status(:pending) + |> then(&process_block_and_check_children(acc, &1)) + end) + + # Schedule a quick follow-up for remaining ready blocks. + if rest != [] do + Process.send_after(self(), :retry_download_columns, 1_000) + end + + # Blocks still missing columns: re-request downloads. + Enum.each(need_download, &request_missing_columns(&1, custody_cols)) + store {:error, reason} -> Logger.error("[PendingBlocks] Failed to get :download_columns blocks: #{reason}") + store end - - store end defp request_missing_columns(block_info, custody_cols) do @@ -276,7 +415,7 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do log_md ) - case Blocks.get_block_info(parent_root) do + case Blocks.get_block_info_cached(parent_root) do nil -> Logger.debug( "[PendingBlocks] Add parent with root: #{Utils.format_shorten_binary(parent_root)} to download", @@ -308,41 +447,141 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do {store, :invalid} %BlockInfo{status: :transitioned} -> - case ForkChoice.on_block(store, block_info) do - {:ok, store} -> - Logger.debug("[PendingBlocks] Block transitioned after ForkChoice.on_block/2", log_md) - Blocks.change_status(block_info, :transitioned) - {store, :transitioned} - - {:error, reason, store} -> - handle_on_block_error(store, block_info, reason, log_md) - end + process_transitioned_parent(store, block_info, message, log_md) _other -> {store, :ok} end end - defp handle_on_block_error(store, block_info, reason, log_md) do - if execution_layer_error?(reason) do - # Transient EL error (connectivity, auth, etc.) — keep block as :pending. - # process_blocks is only triggered by :transitioned/:invalid events, so we - # schedule a delayed retry message to the calling GenServer (Libp2pPort). - Logger.warning( - "[PendingBlocks] Transient EL error, scheduling retry: #{reason}", + defp process_transitioned_parent(store, block_info, message, log_md) do + # Skip blocks that are far behind the current head. During catch-up, + # sync batches download blocks that may already be superseded by the + # canonical chain. Processing them triggers expensive epoch processing + # (10+ minutes for rewards_and_penalties + committee computation with + # 2.2M validators) while blocking the Libp2pPort GenServer, causing + # massive message queue buildup (50K-100K+). + if message.slot + 2 < store.head_slot do + Logger.info( + "[PendingBlocks] Skipping block behind head (slot #{message.slot} vs head #{store.head_slot})", log_md ) - Process.send_after(self(), :retry_pending_blocks, 10_000) - {store, :ok} + Blocks.change_status(block_info, :transitioned) + {store, :transitioned} else - Logger.error( - "[PendingBlocks] Saving block as invalid after ForkChoice.on_block/2 error: #{reason}", - log_md - ) + case ForkChoice.on_block(store, block_info) do + {:ok, store} -> + Logger.debug( + "[PendingBlocks] Block transitioned after ForkChoice.on_block/2", + log_md + ) + + Blocks.change_status(block_info, :transitioned) + {store, :transitioned} + + {:error, reason, store} -> + handle_on_block_error(store, block_info, reason, log_md) + end + end + end + + defp handle_on_block_error(store, block_info, reason, log_md) do + cond do + execution_layer_error?(reason) -> + # Transient EL error (connectivity, auth, etc.) — keep block as :pending. + # process_blocks is only triggered by :transitioned/:invalid events, so we + # schedule a delayed retry message to the calling GenServer (Libp2pPort). + Logger.warning( + "[PendingBlocks] Transient EL error, scheduling retry: #{reason}", + log_md + ) + + Process.send_after(self(), :retry_pending_blocks, 10_000) + {store, :ok} + + data_availability_error?(reason) -> + # Check whether columns are genuinely missing (transient — retry download) + # or all present but verification failed (likely corrupted download). + custody_cols = DasCore.get_local_custody_columns() + missing = DataColumns.missing_columns_for_block(block_info, custody_cols) + + if missing != [] do + Logger.warning( + "[PendingBlocks] Data not available (#{length(missing)} columns missing)," <> + " moving back to download_columns for retry", + log_md + ) + else + # All columns present but KZG verification failed — purge stored columns + # so they get re-downloaded fresh. Without this, retry_download_columns + # would see "no missing columns", move the block to :pending, and loop. + Logger.warning( + "[PendingBlocks] Data not available but all #{length(custody_cols)} custody" <> + " columns present — purging columns for re-download", + log_md + ) + + DataColumnDb.delete_columns_for_block(block_info.root, custody_cols) + end - Blocks.change_status(block_info, :invalid) - {store, :invalid} + Blocks.change_status(block_info, :download_columns) + request_missing_columns(block_info, custody_cols) + Process.send_after(self(), :retry_download_columns, 5_000) + {store, :ok} + + timing_error?(reason) -> + # "block is from the future" happens after GenServer restart when the + # store's time hasn't caught up via on_tick yet. Keep block as :pending + # and retry after a delay — the time will advance and the block will pass. + Logger.warning( + "[PendingBlocks] Transient timing error, scheduling retry: #{reason}", + log_md + ) + + Process.send_after(self(), :retry_pending_blocks, 12_000) + {store, :ok} + + parent_state_missing_error?(reason) -> + # Parent state not found can be transient: the async LevelDB write may + # not have completed yet, or the state was evicted from the 16-entry ETS + # cache during expensive checkpoint state computation (epoch boundaries). + # Retry a few times to let the async write complete, but give up after + # @max_state_retries to avoid spinning forever when the state is truly lost + # (e.g., processed during catch-up mode where ETS/LevelDB writes are skipped). + retry_key = {:state_retry, block_info.root} + retries = Process.get(retry_key, 0) + + if retries < @max_state_retries do + Process.put(retry_key, retries + 1) + + Logger.warning( + "[PendingBlocks] Parent state not found (attempt #{retries + 1}/#{@max_state_retries}), scheduling retry: #{reason}", + log_md + ) + + Process.send_after(self(), :retry_pending_blocks, 5_000) + {store, :ok} + else + Process.delete(retry_key) + + Logger.error( + "[PendingBlocks] Parent state permanently unavailable after #{@max_state_retries} retries, marking invalid: #{reason}", + log_md + ) + + Blocks.change_status(block_info, :invalid) + {store, :invalid} + end + + true -> + Logger.error( + "[PendingBlocks] Saving block as invalid after ForkChoice.on_block/2 error: #{reason}", + log_md + ) + + Blocks.change_status(block_info, :invalid) + {store, :invalid} end end @@ -353,6 +592,28 @@ defmodule LambdaEthereumConsensus.Beacon.PendingBlocks do String.starts_with?(reason, "Error when calling execution client:") end + # Data availability failures are transient during catch-up sync — custody columns + # may not have been downloaded yet. The block should be retried, not invalidated. + defp data_availability_error?(reason) do + reason == "data not available" + end + + # Timing errors happen after GenServer restart when the store's time hasn't + # been advanced by on_tick yet. The block is valid but appears to be "from + # the future" relative to the stale store time. + defp timing_error?(reason) do + reason == "block is from the future" + end + + # Parent state missing errors are transient: they occur when the ETS LRU + # cache (16 entries) evicts the parent state during expensive checkpoint + # state computation, and the async LevelDB write hasn't completed yet. + # After a short delay, the LevelDB write should finish and the state + # becomes retrievable. + defp parent_state_missing_error?(reason) do + String.contains?(reason, "not found in store") + end + defp process_downloaded_block(store, {:ok, [block]}) do {:ok, add_block(store, block)} end diff --git a/lib/lambda_ethereum_consensus/fork_choice/fork_choice.ex b/lib/lambda_ethereum_consensus/fork_choice/fork_choice.ex index 4e925a1b2..8d2ddf22d 100644 --- a/lib/lambda_ethereum_consensus/fork_choice/fork_choice.ex +++ b/lib/lambda_ethereum_consensus/fork_choice/fork_choice.ex @@ -17,6 +17,7 @@ defmodule LambdaEthereumConsensus.ForkChoice do alias LambdaEthereumConsensus.Store.BlobDb alias LambdaEthereumConsensus.Store.BlockDb alias LambdaEthereumConsensus.Store.Blocks + alias LambdaEthereumConsensus.Store.BlockStates alias LambdaEthereumConsensus.Store.StateDb alias LambdaEthereumConsensus.Store.StoreDb alias Types.Attestation @@ -27,6 +28,44 @@ defmodule LambdaEthereumConsensus.ForkChoice do ### Public API ########################## + # Persist the store asynchronously to avoid blocking the Libp2pPort GenServer. + # On mainnet, :erlang.term_to_binary + eleveldb.write can stall for minutes + # during LevelDB compaction, causing message queue explosion (observed 54K+ msgs). + # + # During catch-up sync (head_slot far behind wall clock), persist is skipped + # entirely because: + # 1. Deep-copying the Store struct (1.2M latest_messages) to a new process + # takes 1-9 seconds and can cause OOM on 62 GB systems + # 2. LevelDB is already under heavy write pressure from state/block writes + # 3. The store can be recovered from checkpoint + replay if the node crashes + # + # Once caught up (<= 2 slots behind), persists once per epoch at mid-epoch + # (slot mod 32 == 16). We must avoid slots near epoch boundaries because: + # - slot mod 32 == 0: epoch processing uses peak memory (rewards, merkleization) + # - slot mod 32 == 1: epoch memory hasn't been GC'd yet + # Mid-epoch gives maximum time for GC to reclaim epoch processing memory. + @slots_per_epoch 32 + @max_behind_slots 2 + defp async_persist_store(store) do + current_slot = compute_current_slot(store.time, store.genesis_time) + head_slot = store.head_slot || 0 + catching_up? = current_slot - head_slot > @max_behind_slots + + cond do + catching_up? -> + # Skip persist during catch-up to avoid OOM and reduce memory pressure + :skip + + rem(head_slot, @slots_per_epoch) == 16 -> + # Persist at mid-epoch. Serializes in-process (avoids Store deep-copy + # which takes 15s + 3-5 GB), then spawns only the LevelDB write. + StoreDb.persist_store_async(store) + + true -> + :skip + end + end + @spec init_store(Store.t(), Types.uint64()) :: Store.t() def init_store(%Store{head_slot: head_slot, head_root: head_root} = store, time) do Logger.info("[Fork choice] Initialized store.", slot: head_slot) @@ -60,14 +99,14 @@ defmodule LambdaEthereumConsensus.ForkChoice do {:ok, new_store, timings} -> {new_store, timings} = StateTransition.timed(:recompute_head, timings, fn -> - recompute_head(new_store) + recompute_head(new_store, block_root, slot) end) new_store = prune_old_states(new_store, last_finalized_checkpoint.epoch) {_, timings} = StateTransition.timed(:store_persist, timings, fn -> - StoreDb.persist_store(new_store) + async_persist_store(new_store) end) total = System.monotonic_time(:millisecond) - total_start @@ -107,7 +146,7 @@ defmodule LambdaEthereumConsensus.ForkChoice do _ -> store end - tap(store, &StoreDb.persist_store/1) + tap(store, &async_persist_store/1) end @spec on_attester_slashing(Store.t(), Types.AttesterSlashing.t()) :: Store.t() @@ -116,7 +155,7 @@ defmodule LambdaEthereumConsensus.ForkChoice do case Handlers.on_attester_slashing(store, attester_slashing) do {:ok, new_store} -> - tap(new_store, &StoreDb.persist_store/1) + tap(new_store, &async_persist_store/1) _ -> Logger.error("[Fork choice] Failed to add attester slashing to the store") @@ -130,7 +169,7 @@ defmodule LambdaEthereumConsensus.ForkChoice do Handlers.on_tick(store, time) |> prune_old_states(last_finalized_checkpoint.epoch) - |> tap(&StoreDb.persist_store/1) + |> tap(&async_persist_store/1) end @spec get_current_slot(Types.Store.t()) :: Types.slot() @@ -224,13 +263,14 @@ defmodule LambdaEthereumConsensus.ForkChoice do end @spec get_current_status_message() :: Types.StatusMessage.t() - def get_current_status_message() do - %{ - head_root: head_root, - head_slot: head_slot, - finalized_checkpoint: %{root: finalized_root, epoch: finalized_epoch} - } = fetch_store!() - + def get_current_status_message(), do: get_current_status_message(fetch_store!()) + + @spec get_current_status_message(Store.t()) :: Types.StatusMessage.t() + def get_current_status_message(%{ + head_root: head_root, + head_slot: head_slot, + finalized_checkpoint: %{root: finalized_root, epoch: finalized_epoch} + }) do %Types.StatusMessage{ fork_digest: compute_fork_digest(head_slot, ChainSpec.get_genesis_validators_root()), finalized_root: finalized_root, @@ -241,16 +281,14 @@ defmodule LambdaEthereumConsensus.ForkChoice do end @spec get_current_status_message_v2() :: Types.StatusMessageV2.t() - def get_current_status_message_v2() do - %{ - head_root: head_root, - head_slot: head_slot, - finalized_checkpoint: %{root: finalized_root, epoch: finalized_epoch} - } = fetch_store!() - - # Conservatively report the start of the finalized epoch as the earliest - # available slot. TODO: track the checkpoint sync start slot explicitly for - # a more accurate value. + def get_current_status_message_v2(), do: get_current_status_message_v2(fetch_store!()) + + @spec get_current_status_message_v2(Store.t()) :: Types.StatusMessageV2.t() + def get_current_status_message_v2(%{ + head_root: head_root, + head_slot: head_slot, + finalized_checkpoint: %{root: finalized_root, epoch: finalized_epoch} + }) do earliest_available_slot = finalized_epoch * ChainSpec.get("SLOTS_PER_EPOCH") %Types.StatusMessageV2{ @@ -349,7 +387,65 @@ defmodule LambdaEthereumConsensus.ForkChoice do def process_block(%BlockInfo{signed_block: signed_block} = block_info, store) do attestations = signed_block.message.body.attestations attester_slashings = signed_block.message.body.attester_slashings + block_slot = signed_block.message.slot + wall_slot = get_current_chain_slot(store.genesis_time) + + # During catch-up (>4 slots behind), skip expensive prefetch_states and + # attestation processing. Prefetching checkpoint states from LevelDB takes + # 28-35s per block (300MB BeaconState deserialization), and committee + # computation takes 10s. Attestation processing has no value during catch-up + # since LMD-GHOST is already skipped. Using a small threshold (4 slots) + # instead of SLOTS_PER_EPOCH prevents the 25-35s prefetch_states cost at + # every epoch boundary during the transition from catch-up to normal mode. + # + # Check BOTH the arriving block's distance from wall clock AND our store's + # head distance from wall clock. If our head is far behind but a fresh + # gossip block arrives at tip (block_slot ≈ wall_slot), processing its + # attestations via prefetch_states still costs 30-45 s each — observed + # 2026-04-15 causing gap growth from 11 → 65 slots in 30 min. Treat + # "store head is far behind" as catching_up so we skip the expensive + # prefetch on every block until head catches up. + catching_up? = + wall_slot - block_slot > 4 or + wall_slot - store.head_slot > 4 + + {states, timings} = + if catching_up? do + {[], %{}} + else + prefetch_states_and_committees(store, attestations) + end + + # Re-touch the parent state in ETS so its TTL is fresh. This prevents + # eviction of the parent state during both prefetch_states (which can take + # seconds) and catch-up mode (where rapid sequential block processing can + # fill the 10-entry LRU cache, evicting the parent before the next block + # needs it). Without this, cache misses fall through to LevelDB reads + # that take 30s-10min+ on mainnet (775MB state deserialization + compaction). + BlockStates.touch(signed_block.message.parent_root) + + new_store = update_in(store.checkpoint_states, fn cs -> Map.merge(cs, Map.new(states)) end) + on_block_opts = if catching_up?, do: [skip_pulled_up_tip: true], else: [] + + with {:ok, new_store, handler_timings} <- apply_on_block(new_store, block_info, on_block_opts) do + timings = Map.merge(timings, handler_timings) + + if catching_up? do + # Skip attestation processing during catch-up — attestations from old + # blocks don't contribute to fork choice when LMD-GHOST is skipped. + {:ok, new_store, timings} + else + with {:ok, new_store, timings} <- process_attestations(new_store, attestations, timings), + {:ok, new_store, timings} <- + process_attester_slashings(new_store, attester_slashings, timings) do + {:ok, new_store, timings} + end + end + end + end + + defp prefetch_states_and_committees(store, attestations) do # Prefetch relevant states. {states, timings} = StateTransition.timed(:prefetch_states, %{}, fn -> @@ -367,28 +463,22 @@ defmodule LambdaEthereumConsensus.ForkChoice do end end) - new_store = update_in(store.checkpoint_states, fn cs -> Map.merge(cs, Map.new(states)) end) - - with {:ok, new_store, handler_timings} <- apply_on_block(new_store, block_info) do - timings = Map.merge(timings, handler_timings) - - with {:ok, new_store, timings} <- process_attestations(new_store, attestations, timings), - {:ok, new_store, timings} <- - process_attester_slashings(new_store, attester_slashings, timings) do - {:ok, new_store, timings} - end - end + {states, timings} end def fetch_checkpoint_state(store, checkpoint) do - case Store.get_checkpoint_state(store, checkpoint) do + # Use cached-only fetch to avoid blocking the ForkChoice GenServer + # with 28-85s LevelDB reads for 775MB mainnet BeaconStates. + # If the state isn't in memory/ETS, we skip this checkpoint's attestations + # rather than stalling block processing for up to 85 seconds. + case Store.get_checkpoint_state_cached(store, checkpoint) do {_store, nil} -> [] {_store, state} -> [{checkpoint, state}] end end - defp apply_on_block(store, block_info) do - Handlers.on_block(store, block_info) + defp apply_on_block(store, block_info, opts \\ []) do + Handlers.on_block(store, block_info, opts) end defp process_attester_slashings(store, attester_slashings, timings) do @@ -421,18 +511,35 @@ defmodule LambdaEthereumConsensus.ForkChoice do # Recomputes the head in the store and sends the new head to others (libP2P, # operations collector db, execution chain db). - @spec recompute_head(Store.t()) :: Store.t() - defp recompute_head(store) do - {:ok, head_root} = Head.get_head(store) - head_block = Blocks.get_block!(head_root) + @spec recompute_head(Store.t(), Types.root(), Types.slot()) :: Store.t() + defp recompute_head(store, block_root, block_slot) do + wall_slot = get_current_chain_slot(store.genesis_time) + + head_root = + if wall_slot - block_slot > 1 do + # When behind the chain tip (>1 slot), head is the latest processed + # block. Skip expensive LMD-GHOST (~3-4s) since during catch-up there + # are no competing forks — we only have the canonical chain from peers. + block_root + else + {:ok, root} = Head.get_head(store) + root + end + + # Cache-only — avoid blocking Libp2pPort on LevelDB reads. + head_block = Blocks.get_block_cached(head_root) - Handlers.notify_forkchoice_update(store, head_block) + if head_block do + Handlers.notify_forkchoice_update(store, head_block) - %{slot: slot, body: body} = head_block + %{slot: slot, body: body} = head_block + + OperationsCollector.notify_new_block(head_block) + Libp2pPort.notify_new_head(slot, head_root) + ExecutionChain.notify_new_block(slot, body.eth1_data, body.execution_payload) + end - OperationsCollector.notify_new_block(head_block) - Libp2pPort.notify_new_head(slot, head_root) - ExecutionChain.notify_new_block(slot, body.eth1_data, body.execution_payload) + slot = if head_block, do: head_block.slot, else: store.head_slot || 0 Logger.debug("[Fork choice] Updated fork choice cache", slot: slot) diff --git a/lib/lambda_ethereum_consensus/fork_choice/handlers.ex b/lib/lambda_ethereum_consensus/fork_choice/handlers.ex index ce4779892..90c622c42 100644 --- a/lib/lambda_ethereum_consensus/fork_choice/handlers.ex +++ b/lib/lambda_ethereum_consensus/fork_choice/handlers.ex @@ -58,17 +58,24 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do A block that is asserted as invalid due to unavailable PoW block may be valid at a later time, consider scheduling it for later processing in such case. """ - @spec on_block(Store.t(), BlockInfo.t()) :: + @spec on_block(Store.t(), BlockInfo.t(), keyword()) :: {:ok, Store.t(), StateTransition.timings()} | {:error, String.t()} - def on_block(%Store{} = store, %BlockInfo{} = block_info) do + def on_block(%Store{} = store, %BlockInfo{} = block_info, opts \\ []) do block = block_info.signed_block.message %{epoch: finalized_epoch, root: finalized_root} = store.finalized_checkpoint finalized_slot = Misc.compute_start_slot_at_epoch(finalized_epoch) - base_state = Store.get_state(store, block.parent_root) + # Use cache-only lookup to avoid blocking Libp2pPort on LevelDB reads. + # On ETS cache miss, we drop the block (returning an error). Optimistic + # sync will re-pull blocks in sequence, at which point each parent is + # freshly cached from the previous block's processing. This prevents + # 10+ minute stalls from eleveldb.get/3 NIF calls of 775MB mainnet + # BeaconStates that block the scheduler. + base_state = Store.get_state_cached(store, block.parent_root) cond do - # Parent block must be known + # Parent block must be known (or parent state evicted from cache — + # drop block, optimistic sync will recover) base_state |> is_nil() -> {:error, "parent state (block root = #{Base.encode16(block.parent_root)}) not found in store"} @@ -94,7 +101,7 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do end) if da_ok? do - compute_post_state(store, block_info, base_state, timings) + compute_post_state(store, block_info, base_state, timings, opts) else {:error, "data not available"} end @@ -111,10 +118,14 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do def data_available?(_beacon_block_root, []), do: true def data_available?(beacon_block_root, blob_kzg_commitments) do - if HardForkAliasInjection.fulu?() do - columns_data_available?(beacon_block_root, blob_kzg_commitments) + if Application.get_env(:lambda_ethereum_consensus, :skip_data_availability, false) do + true else - blobs_data_available?(beacon_block_root, blob_kzg_commitments) + if HardForkAliasInjection.fulu?() do + columns_data_available?(beacon_block_root, blob_kzg_commitments) + else + blobs_data_available?(beacon_block_root, blob_kzg_commitments) + end end end @@ -177,12 +188,20 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do is_from_block ) do with :ok <- check_attestation_valid(store, attestation, is_from_block), - # Get state at the `target` to fully validate attestation + # Get state at the `target` to fully validate attestation. + # Use cache-only lookup to avoid blocking Libp2pPort on LevelDB reads. + # Existing nil handling (below) skips the attestation if state isn't + # cached — attestations are best-effort for fork choice. {new_store, target_state} when not is_nil(target_state) <- - Store.get_checkpoint_state(store, attestation.data.target), + Store.get_checkpoint_state_cached(store, attestation.data.target), {:ok, indexed_attestation} <- Accessors.get_indexed_attestation(target_state, attestation), - :ok <- check_valid_indexed_attestation(target_state, indexed_attestation) do + # Block attestations were already BLS-verified during state transition. + :ok <- + if(is_from_block, + do: :ok, + else: check_valid_indexed_attestation(target_state, indexed_attestation) + ) do # Update latest messages for attesting indices update_latest_messages(new_store, indexed_attestation.attesting_indices, attestation) else @@ -230,8 +249,18 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do attestation_2: %IndexedAttestation{} = attestation_2 } ) do - state = Store.get_state!(store, store.justified_checkpoint.root).beacon_state + # Cache-only lookup — avoid blocking on LevelDB read of 775MB state. + # If justified checkpoint state isn't cached, skip this slashing (best-effort). + case Store.get_state_cached(store, store.justified_checkpoint.root) do + nil -> + {:error, "justified checkpoint state not cached, skipping slashing"} + + %{beacon_state: state} -> + check_attester_slashing(store, state, attestation_1, attestation_2) + end + end + defp check_attester_slashing(store, state, attestation_1, attestation_2) do cond do not Predicates.slashable_attestation_data?(attestation_1.data, attestation_2.data) -> {:error, "attestation is not slashable"} @@ -257,7 +286,8 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do %Store{} = store, %BlockInfo{} = block_info, %StateInfo{} = state_info, - timings + timings, + opts \\ [] ) do block = block_info.signed_block.message @@ -300,14 +330,34 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do time_into_slot = rem(store.time - store.genesis_time, seconds_per_slot) is_before_attesting_interval = time_into_slot < div(seconds_per_slot, intervals_per_slot) - # Add new block and state to the store + # Add new block and state to the in-memory store map (O(1)). new_store = Store.store_state(store, new_state_info.block_root, new_state_info) - BlockStates.store_state_info(new_state_info) + catching_up? = Keyword.get(opts, :skip_pulled_up_tip, false) + + # Always write to ETS cache so the state is available for fork choice + # lookups even after catch-up transitions. The ETS insert takes ~160ms + # which is acceptable even during catch-up (blocks process in 1-2s). + # Without this, states processed during catch-up are only in store.states + # (in-memory map) which gets pruned after finalization, permanently losing + # the state and causing cascade invalid block failures. + {_, timings} = + StateTransition.timed(:store_state, timings, fn -> + BlockStates.store_state_info(new_state_info) + end) - Task.Supervisor.start_child( - StoreStatesSupervisor, - fn -> StateDb.store_state_info(new_state_info) end - ) + # LevelDB write is expensive (~30-60s for serialization) and even + # infrequent writes cause compaction of 448MB SST tables that block + # concurrent reads for 5-10+ minutes on mainnet. Only persist at epoch + # boundaries (~every 6.4 min) and only when at head. This gives ~1 + # LevelDB write per epoch instead of 8 (every 4th block) or 32 (every + # block). The ETS LRU cache (10 entries) is the primary storage; + # LevelDB is only for crash recovery to the nearest epoch boundary. + if not catching_up? and rem(block.slot, ChainSpec.get("SLOTS_PER_EPOCH")) == 0 do + Task.Supervisor.start_child( + StoreStatesSupervisor, + fn -> StateDb.store_state_info(new_state_info) end + ) + end is_first_block = new_store.proposer_boost_root == <<0::256>> @@ -317,19 +367,39 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do state = new_state_info.beacon_state - new_store - |> Store.store_block_info(block_info) - |> if_then_update( - is_timely and is_first_block, - &%{&1 | proposer_boost_root: block_info.root} - ) - # Update checkpoints in store if necessary - |> update_checkpoints(state.current_justified_checkpoint, state.finalized_checkpoint) - # Eagerly compute unrealized justification and finality - |> compute_pulled_up_tip(block_info.root, block_info.signed_block.message, state) - |> case do - {:ok, store} -> {:ok, store, timings} - err -> err + {new_store, timings} = + StateTransition.timed(:store_block, timings, fn -> + new_store + |> Store.store_block_info(block_info) + |> if_then_update( + is_timely and is_first_block, + &%{&1 | proposer_boost_root: block_info.root} + ) + # Update checkpoints in store if necessary + |> update_checkpoints(state.current_justified_checkpoint, state.finalized_checkpoint) + end) + + # Eagerly compute unrealized justification and finality. + # Skip during catch-up: unrealized checkpoints are only needed for + # fork choice head computation, which doesn't run during catch-up sync. + # Each call scans 2.2M validators twice (~210ms per block). + if Keyword.get(opts, :skip_pulled_up_tip, false) do + {:ok, new_store, timings} + else + {result, timings} = + StateTransition.timed(:pulled_up_tip, timings, fn -> + compute_pulled_up_tip( + new_store, + block_info.root, + block_info.signed_block.message, + state + ) + end) + + case result do + {:ok, store} -> {:ok, store, timings} + err -> err + end end end end @@ -337,14 +407,20 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do @spec notify_forkchoice_update(Store.t(), BeaconBlock.t()) :: {:ok, any()} | {:error, any()} def notify_forkchoice_update(store, head_block) do - finalized_block = Blocks.get_block!(store.finalized_checkpoint.root) - - # TODO: do someting with the result from the execution client - ExecutionClient.notify_forkchoice_updated(%{ - finalized_block_hash: finalized_block.body.execution_payload.block_hash, - head_block_hash: head_block.body.execution_payload.block_hash, - safe_block_hash: Store.get_safe_execution_payload_hash(store) - }) + # Cache-only — avoid blocking Libp2pPort on LevelDB reads. + finalized_block = Blocks.get_block_cached(store.finalized_checkpoint.root) + safe_block = Blocks.get_block_cached(store.finalized_checkpoint.root) + + if is_nil(finalized_block) or is_nil(safe_block) do + {:error, "finalized/safe block not cached"} + else + # TODO: do someting with the result from the execution client + ExecutionClient.notify_forkchoice_updated(%{ + finalized_block_hash: finalized_block.body.execution_payload.block_hash, + head_block_hash: head_block.body.execution_payload.block_hash, + safe_block_hash: safe_block.body.execution_payload.block_hash + }) + end end ### Private functions ### @@ -454,7 +530,9 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do defp check_attestation_valid(%Store{} = store, %Attestation{} = attestation, true) do target = attestation.data.target block_root = attestation.data.beacon_block_root - head_block = Blocks.get_block(block_root) + # Cache-only lookups — avoid blocking Libp2pPort on eleveldb.get/3. + # If block data isn't in the 512-entry LRU, treat as unknown and skip. + head_block = Blocks.get_block_cached(block_root) # NOTE: we use cond instead of an `and` chain for better formatting cond do @@ -465,7 +543,7 @@ defmodule LambdaEthereumConsensus.ForkChoice.Handlers do # Attestation target must be for a known block. # If target block is unknown, delay consideration until block is found # TODO: delay consideration until block is found - Blocks.get_block(target.root) |> is_nil() -> + Blocks.get_block_cached(target.root) |> is_nil() -> {:unknown_block, target.root} # Attestations must be for a known block. If block is unknown, delay consideration until the block is found diff --git a/lib/lambda_ethereum_consensus/fork_choice/head.ex b/lib/lambda_ethereum_consensus/fork_choice/head.ex index 0d14d1a27..b9382a7ca 100644 --- a/lib/lambda_ethereum_consensus/fork_choice/head.ex +++ b/lib/lambda_ethereum_consensus/fork_choice/head.ex @@ -15,11 +15,19 @@ defmodule LambdaEthereumConsensus.ForkChoice.Head do # Execute the LMD-GHOST fork choice head = store.justified_checkpoint.root - {_store, %BeaconState{} = justified_state} = - Store.get_checkpoint_state(store, store.justified_checkpoint) - - head = compute_head(store, filtered_blocks, head, justified_state) - {:ok, head} + # Cache-only checkpoint state lookup to avoid Libp2pPort stalling on + # eleveldb.get/3 (10+ min NIF blocks). If justified state isn't cached, + # fall back to returning the justified checkpoint root as head without + # running LMD-GHOST weight computation. This is conservative and safe — + # next block will re-attempt with a warm cache. + case Store.get_checkpoint_state_cached(store, store.justified_checkpoint) do + {_store, %BeaconState{} = justified_state} -> + head = compute_head(store, filtered_blocks, head, justified_state) + {:ok, head} + + {_store, nil} -> + {:ok, store.head_root || store.justified_checkpoint.root} + end end defp compute_head(store, blocks, current_root, justified_state) do @@ -46,8 +54,18 @@ defmodule LambdaEthereumConsensus.ForkChoice.Head do end defp get_weight(%Store{} = store, root, state) do - block = Blocks.get_block!(root) + # Cache-only — avoid blocking Libp2pPort on LevelDB reads. + block = Blocks.get_block_cached(root) + # If block isn't cached, return 0 weight (conservative — favors cached branches). + if is_nil(block) do + 0 + else + get_weight_for_block(store, root, block, state) + end + end + + defp get_weight_for_block(store, root, block, state) do # PERF: use ``Aja.Vector.foldl`` {attestation_score, _} = Accessors.get_active_validator_indices(state, Accessors.get_current_epoch(state)) @@ -93,9 +111,16 @@ defmodule LambdaEthereumConsensus.ForkChoice.Head do # Only return the roots and their parent roots. defp get_filtered_block_tree(%Store{} = store) do base = store.justified_checkpoint.root - block = Blocks.get_block!(base) - {_, blocks} = filter_block_tree(store, base, block, %{}) - Enum.map(blocks, fn {root, block} -> {root, block.parent_root} end) + # Cache-only — justified root should almost always be cached. + block = Blocks.get_block_cached(base) + + if is_nil(block) do + # Return empty tree — head defaults to justified root. + [] + else + {_, blocks} = filter_block_tree(store, base, block, %{}) + Enum.map(blocks, fn {root, block} -> {root, block.parent_root} end) + end end defp filter_block_tree(%Store{} = store, block_root, block, blocks) do @@ -121,58 +146,86 @@ defmodule LambdaEthereumConsensus.ForkChoice.Head do end defp filter_leaf_block(%Store{} = store, block_root, block, blocks) do + correct_justified = justified_check(store, block_root) + correct_finalized = finalized_check(store, block_root) + + # If expected finalized/justified, add to viable block-tree and signal viability to parent. + if correct_justified and correct_finalized do + {true, Map.put(blocks, block_root, block)} + else + {false, blocks} + end + end + + defp justified_check(%Store{} = store, block_root) do current_epoch = Store.get_current_epoch(store) voting_source = get_voting_source(store, block_root) - # The voting source should be at the same height as the store's justified checkpoint - correct_justified = + correct = store.justified_checkpoint.epoch == Constants.genesis_epoch() or voting_source.epoch == store.justified_checkpoint.epoch or voting_source.epoch + 2 >= current_epoch - # If the previous epoch is justified, the block should be pulled-up. In this case, check that unrealized - # justification is higher than the store and that the voting source is not more than two epochs ago - correct_justified = - if not correct_justified and previous_epoch_justified?(store) do - store.unrealized_justifications[block_root].epoch >= store.justified_checkpoint.epoch and - voting_source.epoch + 2 >= current_epoch - else - correct_justified - end + if not correct and previous_epoch_justified?(store) do + pulled_up_check(store, block_root, voting_source, current_epoch) + else + correct + end + end - finalized_checkpoint_block = - Store.get_checkpoint_block( - store, - block_root, - store.finalized_checkpoint.epoch - ) + defp pulled_up_check(store, block_root, voting_source, current_epoch) do + unrealized = store.unrealized_justifications[block_root] - correct_finalized = - store.finalized_checkpoint.epoch == Constants.genesis_epoch() or - store.finalized_checkpoint.root == finalized_checkpoint_block + unrealized != nil and + unrealized.epoch >= store.justified_checkpoint.epoch and + voting_source.epoch + 2 >= current_epoch + end - # If expected finalized/justified, add to viable block-tree and signal viability to parent. - if correct_justified and correct_finalized do - {true, Map.put(blocks, block_root, block)} - else - # Otherwise, branch not viable - {false, blocks} - end + defp finalized_check(%Store{} = store, block_root) do + store.finalized_checkpoint.epoch == Constants.genesis_epoch() or + store.finalized_checkpoint.root == + Store.get_checkpoint_block(store, block_root, store.finalized_checkpoint.epoch) end # Compute the voting source checkpoint in event that block with root ``block_root`` is the head block defp get_voting_source(%Store{} = store, block_root) do - block = Blocks.get_block!(block_root) + # Cache-only — avoid blocking Libp2pPort on LevelDB reads. + case Blocks.get_block_cached(block_root) do + nil -> + # Block not cached — fall back to justified checkpoint. + store.justified_checkpoint + + block -> + get_voting_source_for_block(store, block_root, block) + end + end + + defp get_voting_source_for_block(store, block_root, block) do current_epoch = Store.get_current_epoch(store) block_epoch = Misc.compute_epoch_at_slot(block.slot) if current_epoch > block_epoch do - # The block is from a prior epoch, the voting source will be pulled-up - store.unrealized_justifications[block_root] + # The block is from a prior epoch, the voting source will be pulled-up. + # After restart/recovery, unrealized_justifications may not have this root + # (rebuild_tree doesn't populate it). Fall back to the block's state. + store.unrealized_justifications[block_root] || + voting_source_fallback(store, block_root) else - # The block is not from a prior epoch, therefore the voting source is not pulled up - head_state = Store.get_state!(store, block_root).beacon_state - head_state.current_justified_checkpoint + # The block is not from a prior epoch, therefore the voting source is not pulled up. + # Use cache-only lookup to avoid Libp2pPort stalling on LevelDB reads. + # On cache miss, fall back to voting_source_fallback which also uses cached + # lookups and returns store.justified_checkpoint if no state is available. + case Store.get_state_cached(store, block_root) do + %{beacon_state: state} -> state.current_justified_checkpoint + nil -> voting_source_fallback(store, block_root) + end + end + end + + defp voting_source_fallback(store, block_root) do + case Store.get_state_cached(store, block_root) do + %{beacon_state: state} -> state.current_justified_checkpoint + nil -> store.justified_checkpoint end end diff --git a/lib/lambda_ethereum_consensus/p2p/block_downloader.ex b/lib/lambda_ethereum_consensus/p2p/block_downloader.ex index 9b81b7197..c14da3dbb 100644 --- a/lib/lambda_ethereum_consensus/p2p/block_downloader.ex +++ b/lib/lambda_ethereum_consensus/p2p/block_downloader.ex @@ -66,30 +66,48 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do def request_blocks_by_range(slot, count, on_blocks, retries) do Logger.debug("Requesting block", slot: slot) - peer_id = get_some_peer() - - request = - %Types.BeaconBlocksByRangeRequest{start_slot: slot, count: count} - |> ReqResp.encode_request() - - Libp2pPort.send_async_request(peer_id, @blocks_by_range_protocol_id, request, fn store, - response -> - Metrics.handler_span( - "response_handler", - "blocks_by_range", - fn -> - handle_blocks_by_range_response( - store, - response, - slot, - count, - retries, - peer_id, - on_blocks + case get_some_peer() do + :no_peers -> + # See comment in `request_blocks_by_root/3` — raising on no-peers used + # to crash Libp2pPort. Callers re-schedule on their own heartbeats + # (SyncBlocks.run is invoked from `:sync_blocks`), so we can safely + # no-op here. + :telemetry.execute( + [:network, :request], + %{blocks: 0}, + %{type: "by_slot", reason: "no_peers", result: "error"} + ) + + Logger.warning("[BlockDownloader] No peers available for BlocksByRange; will retry", + slot: slot + ) + + :ok + + peer_id -> + request = + %Types.BeaconBlocksByRangeRequest{start_slot: slot, count: count} + |> ReqResp.encode_request() + + Libp2pPort.send_async_request(peer_id, @blocks_by_range_protocol_id, request, fn store, + response -> + Metrics.handler_span( + "response_handler", + "blocks_by_range", + fn -> + handle_blocks_by_range_response( + store, + response, + slot, + count, + retries, + peer_id, + on_blocks + ) + end ) - end - ) - end) + end) + end end defp handle_blocks_by_range_response(store, response, slot, count, retries, peer_id, on_blocks) do @@ -142,20 +160,42 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do def request_blocks_by_root(roots, on_blocks, retries) do Logger.debug("Requesting block for roots #{Enum.map_join(roots, ", ", &Base.encode16/1)}") - peer_id = get_some_peer() - - request = ReqResp.encode_request({roots, TypeAliases.beacon_blocks_by_root_request()}) + case get_some_peer() do + :no_peers -> + # Peerbook is empty — this is recoverable (peers will reconnect / be + # rediscovered), and the block is already queued in + # `Blocks.add_block_to_download` by the caller, so it'll be retried on + # the next :check_pending_blocks tick once peers are back. Previously + # we raised here, which crashed the whole Libp2pPort GenServer + # (observed 2026-04-20 22:30): a multi-minute prefetch_states stall + # let all peers time out, and the subsequent :check_pending_blocks + # hit an empty Peerbook and crash-looped Libp2pPort every ~4 s. See + # TODO #1317. We intentionally do NOT invoke `on_blocks` here — we + # don't have a Store reference, and doing nothing preserves the block + # in the download queue for the next tick to retry. + :telemetry.execute( + [:network, :request], + %{blocks: 0}, + %{type: "by_root", reason: "no_peers", result: "error"} + ) + + Logger.warning("[BlockDownloader] No peers available for BlocksByRoot; will retry") + :ok - Libp2pPort.send_async_request(peer_id, @blocks_by_root_protocol_id, request, fn store, - response -> - Metrics.handler_span( - "response_handler", - "blocks_by_root", - fn -> - handle_blocks_by_root_response(store, response, roots, on_blocks, peer_id, retries) - end - ) - end) + peer_id -> + request = ReqResp.encode_request({roots, TypeAliases.beacon_blocks_by_root_request()}) + + Libp2pPort.send_async_request(peer_id, @blocks_by_root_protocol_id, request, fn store, + response -> + Metrics.handler_span( + "response_handler", + "blocks_by_root", + fn -> + handle_blocks_by_root_response(store, response, roots, on_blocks, peer_id, retries) + end + ) + end) + end end defp handle_blocks_by_root_response(store, response, roots, on_blocks, peer_id, retries) do @@ -186,8 +226,11 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do defp get_some_peer() do case P2P.Peerbook.get_some_peer() do nil -> - # TODO: (#1317) handle no-peers asynchronously - raise "No peers available to request blocks from." + # Return a sentinel instead of raising — callers handle :no_peers + # gracefully by leaving the pending block in the download queue and + # retrying on the next :check_pending_blocks tick. Raising here + # previously crashed the owning Libp2pPort GenServer (TODO #1317). + :no_peers peer_id -> peer_id diff --git a/lib/lambda_ethereum_consensus/p2p/data_column_downloader.ex b/lib/lambda_ethereum_consensus/p2p/data_column_downloader.ex index c2eb7ed16..40dca333f 100644 --- a/lib/lambda_ethereum_consensus/p2p/data_column_downloader.ex +++ b/lib/lambda_ethereum_consensus/p2p/data_column_downloader.ex @@ -142,7 +142,8 @@ defmodule LambdaEthereumConsensus.P2P.DataColumnDownloader do peer_id = Enum.find_value(column_indices, fn idx -> P2P.Peerbook.get_peer_for_column(idx) end) || - P2P.Peerbook.get_peerdas_peer() + P2P.Peerbook.get_peerdas_peer() || + get_some_peer() # Group by block_root and convert to DataColumnsByRootIdentifier (spec format). by_root_identifiers = diff --git a/lib/lambda_ethereum_consensus/p2p/incoming_requests_handler.ex b/lib/lambda_ethereum_consensus/p2p/incoming_requests_handler.ex index a5caa6871..40116c560 100644 --- a/lib/lambda_ethereum_consensus/p2p/incoming_requests_handler.ex +++ b/lib/lambda_ethereum_consensus/p2p/incoming_requests_handler.ex @@ -41,13 +41,16 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do @request_names |> Enum.map(&Enum.join([@request_prefix, &1, "/ssz_snappy"])) end - @spec handle(String.t(), String.t(), binary()) :: {:ok, any()} | {:error, String.t()} - def handle(@request_prefix <> name, message_id, message) do + @spec handle(String.t(), String.t(), binary(), Types.Store.t() | nil) :: + {:ok, any()} | {:error, String.t()} + def handle(protocol, message_id, message, store \\ nil) + + def handle(@request_prefix <> name, message_id, message, store) do Logger.debug("'#{name}' request received") result = Metrics.handler_span("request_handler", name |> String.split("/") |> List.first(), fn -> - handle_req(name, message_id, message) + handle_req(name, message_id, message, store) end) case result do @@ -56,32 +59,42 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end end - @spec handle_req(String.t(), String.t(), binary()) :: + @spec handle_req(String.t(), String.t(), binary(), Types.Store.t() | nil) :: {:ok, any()} | {:error, String.t()} - defp handle_req(protocol_name, message_id, message) + defp handle_req(protocol_name, message_id, message, store) - defp handle_req("status/1/ssz_snappy", message_id, message) do + defp handle_req("status/1/ssz_snappy", message_id, message, store) do with {:ok, request} <- ReqResp.decode_request(message, Types.StatusMessage) do Logger.debug("[Status] '#{inspect(request)}'") - payload = ForkChoice.get_current_status_message() |> ReqResp.encode_ok() - {:ok, {message_id, payload}} + + payload = + if store, + do: ForkChoice.get_current_status_message(store), + else: ForkChoice.get_current_status_message() + + {:ok, {message_id, ReqResp.encode_ok(payload)}} end end - defp handle_req("status/2/ssz_snappy", message_id, message) do + defp handle_req("status/2/ssz_snappy", message_id, message, store) do with {:ok, request} <- ReqResp.decode_request(message, Types.StatusMessageV2) do Logger.debug("[StatusV2] '#{inspect(request)}'") - payload = ForkChoice.get_current_status_message_v2() |> ReqResp.encode_ok() - {:ok, {message_id, payload}} + + payload = + if store, + do: ForkChoice.get_current_status_message_v2(store), + else: ForkChoice.get_current_status_message_v2() + + {:ok, {message_id, ReqResp.encode_ok(payload)}} end end - defp handle_req("goodbye/1/ssz_snappy", _, "") do + defp handle_req("goodbye/1/ssz_snappy", _, "", _store) do # ignore empty messages {:error, "Empty message"} end - defp handle_req("goodbye/1/ssz_snappy", message_id, message) do + defp handle_req("goodbye/1/ssz_snappy", message_id, message, _store) do case ReqResp.decode_request(message, TypeAliases.uint64()) do {:ok, goodbye_reason} -> Logger.debug("[Goodbye] reason: #{goodbye_reason}") @@ -94,7 +107,7 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end end - defp handle_req("ping/1/ssz_snappy", message_id, message) do + defp handle_req("ping/1/ssz_snappy", message_id, message, _store) do # Values are hardcoded with {:ok, seq_num} <- ReqResp.decode_request(message, TypeAliases.uint64()) do Logger.debug("[Ping] seq_number: #{seq_num}") @@ -104,13 +117,13 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end end - defp handle_req("metadata/2/ssz_snappy", message_id, _message) do + defp handle_req("metadata/2/ssz_snappy", message_id, _message, _store) do # NOTE: there's no request content so we just ignore it payload = Metadata.get_metadata() |> ReqResp.encode_ok() {:ok, {message_id, payload}} end - defp handle_req("beacon_blocks_by_range/2/ssz_snappy", message_id, message) do + defp handle_req("beacon_blocks_by_range/2/ssz_snappy", message_id, message, _store) do with {:ok, request} <- ReqResp.decode_request(message, Types.BeaconBlocksByRangeRequest) do %{start_slot: start_slot, count: count} = request @@ -120,11 +133,22 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end_slot = start_slot + (truncated_count - 1) - # TODO: extend cache to support slots as keys + # Spawn a Task for LevelDB reads to avoid blocking Libp2pPort. + # BlocksByRange requires slot-keyed lookups (no ETS cache), so we + # run them off the main process. If the task takes too long, return + # an empty response rather than stalling Libp2pPort. + task = + Task.async(fn -> + start_slot..end_slot + |> Enum.map(&BlockDb.get_block_info_by_slot/1) + |> Enum.map(&map_block_result/1) + end) + response_chunk = - start_slot..end_slot - |> Enum.map(&BlockDb.get_block_info_by_slot/1) - |> Enum.map(&map_block_result/1) + case Task.yield(task, 5_000) || Task.shutdown(task, :brutal_kill) do + {:ok, results} -> results + nil -> [] + end |> Enum.reject(&(&1 == :skip)) |> ReqResp.encode_response() @@ -132,18 +156,22 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end end - defp handle_req("beacon_blocks_by_root/2/ssz_snappy", message_id, message) do + defp handle_req("beacon_blocks_by_root/2/ssz_snappy", message_id, message, _store) do with {:ok, roots} <- ReqResp.decode_request(message, TypeAliases.beacon_blocks_by_root_request()) do count = length(roots) Logger.info("[BlocksByRoot] requested #{count} number of blocks") truncated_count = min(count, ChainSpec.get("MAX_REQUEST_BLOCKS")) + # Cache-only block lookups to avoid blocking Libp2pPort on LevelDB reads. response_chunk = roots |> Enum.take(truncated_count) - |> Enum.map(&Blocks.get_block_info/1) - |> Enum.map(&map_block_result/1) + |> Enum.map(&Blocks.get_block_info_cached/1) + |> Enum.map(fn + nil -> :skip + block_info -> map_block_result(block_info) + end) |> Enum.reject(&(&1 == :skip)) |> ReqResp.encode_response() @@ -151,13 +179,13 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end end - defp handle_req("metadata/3/ssz_snappy", message_id, _message) do + defp handle_req("metadata/3/ssz_snappy", message_id, _message, _store) do # MetadataV3 (Fulu): adds custody_group_count to the metadata response. payload = Metadata.get_metadata() |> ReqResp.encode_ok() {:ok, {message_id, payload}} end - defp handle_req("data_column_sidecars_by_root/1/ssz_snappy", message_id, message) do + defp handle_req("data_column_sidecars_by_root/1/ssz_snappy", message_id, message, _store) do with {:ok, identifiers} <- ReqResp.decode_request(message, TypeAliases.data_column_sidecars_by_root_request()) do # Each DataColumnsByRootIdentifier has block_root + columns (list of indices). @@ -186,7 +214,7 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do end end - defp handle_req("data_column_sidecars_by_range/1/ssz_snappy", message_id, _message) do + defp handle_req("data_column_sidecars_by_range/1/ssz_snappy", message_id, _message, _store) do # DataColumnSidecarsByRangeRequest has: start_slot, count, columns. # We serve stored sidecars for the requested slot range and column indices. # TODO: implement full range serving once DataColumnDb supports slot-indexed iteration. @@ -194,7 +222,7 @@ defmodule LambdaEthereumConsensus.P2P.IncomingRequestsHandler do {:ok, {message_id, ReqResp.encode_response([])}} end - defp handle_req(protocol, _message_id, _message) do + defp handle_req(protocol, _message_id, _message, _store) do # This should never happen, since Libp2p only accepts registered protocols {:error, "Unsupported protocol: #{protocol}"} end diff --git a/lib/lambda_ethereum_consensus/p2p/peerbook.ex b/lib/lambda_ethereum_consensus/p2p/peerbook.ex index 6d8d2e69c..7eff6a4e9 100644 --- a/lib/lambda_ethereum_consensus/p2p/peerbook.ex +++ b/lib/lambda_ethereum_consensus/p2p/peerbook.ex @@ -11,9 +11,12 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do @initial_score 100 @penalizing_score 15 - @target_peers 128 - @max_prune_size 8 - @prune_percentage 0.05 + # Hard cap: reject new peers above this limit to prevent Libp2pPort overload. + @max_peers 100 + # Soft target: start evicting low-value peers when above this count. + @target_peers 80 + @max_prune_size 10 + @prune_percentage 0.10 if HardForkAliasInjection.fulu?() do @metadata_protocol_id "/eth2/beacon_chain/req/metadata/3/ssz_snappy" @@ -49,6 +52,11 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do @doc """ Get some peer from the peerbook. """ + @doc "Returns the number of peers currently in the peerbook." + def peer_count() do + fetch_peerbook!() |> map_size() + end + def get_some_peer() do # TODO: This is a very naive implementation of a peer selection algorithm, # this sorts the peers every time. The same is true for the pruning. @@ -137,16 +145,59 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do "[Peerbook] New peer connected: #{inspect(Utils.format_shorten_binary(peer_id))}" ) - if not Map.has_key?(peerbook, peer_id) do - :telemetry.execute([:peers, :connection], %{id: peer_id}, %{result: "success"}) - entry = %{score: @initial_score, node_id: node_id, custody_group_count: nil} - Map.put(peerbook, peer_id, entry) |> store_peerbook() - Task.start(__MODULE__, :challenge_peer, [peer_id]) + cond do + Map.has_key?(peerbook, peer_id) -> + # Already known, just update node_id if we got one from discovery + if node_id != nil and peerbook[peer_id].node_id == nil do + Map.update!(peerbook, peer_id, fn e -> %{e | node_id: node_id} end) + |> store_peerbook() + end + + map_size(peerbook) >= @max_peers -> + # Hard cap reached. Only accept if we can evict a lower-value peer. + evict_and_add(peerbook, peer_id, node_id) + + true -> + :telemetry.execute([:peers, :connection], %{id: peer_id}, %{result: "success"}) + entry = %{score: @initial_score, node_id: node_id, custody_group_count: nil} + Map.put(peerbook, peer_id, entry) |> store_peerbook() + Task.start(__MODULE__, :challenge_peer, [peer_id]) end prune() end + # When at max_peers, evict the lowest-scoring non-PeerDAS peer to make room. + # PeerDAS peers (with custody_group_count set) are protected from eviction. + defp evict_and_add(peerbook, new_peer_id, node_id) do + # Find lowest-scoring non-PeerDAS peer + victim = + peerbook + |> Enum.filter(fn {_id, %{custody_group_count: cgc}} -> cgc == nil end) + |> Enum.min_by(fn {_id, %{score: s}} -> s end, fn -> nil end) + + case victim do + {victim_id, _} -> + Logger.debug( + "[Peerbook] At max_peers (#{@max_peers}), evicting #{inspect(Utils.format_shorten_binary(victim_id))} for new peer" + ) + + :telemetry.execute([:peers, :connection], %{id: new_peer_id}, %{result: "success"}) + entry = %{score: @initial_score, node_id: node_id, custody_group_count: nil} + + peerbook + |> Map.delete(victim_id) + |> Map.put(new_peer_id, entry) + |> store_peerbook() + + Task.start(__MODULE__, :challenge_peer, [new_peer_id]) + + nil -> + # All peers are PeerDAS peers — don't evict, just drop the new one + Logger.debug("[Peerbook] At max_peers (#{@max_peers}), all PeerDAS — ignoring new peer") + end + end + def challenge_peer(peer_id) do case Libp2pPort.send_request(peer_id, @metadata_protocol_id, "") do {:ok, <<0, _::binary>> = response} -> @@ -182,18 +233,43 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do defp prune() do peerbook = fetch_peerbook!() len = map_size(peerbook) - prune_size = if len > 0, do: calculate_prune_size(len), else: 0 - - if prune_size > 0 do - Logger.debug("[Peerbook] Pruning #{prune_size} peers by challenge") - - n = :rand.uniform(len) - - peerbook - |> Map.keys() - |> Stream.drop(n) - |> Stream.take(prune_size) - |> Enum.each(fn peer_id -> Task.start(__MODULE__, :challenge_peer, [peer_id]) end) + excess = len - @target_peers + + cond do + excess > @max_prune_size -> + # Well above target: immediately evict lowest-scoring non-PeerDAS peers. + evict_count = min(excess, @max_prune_size) + + victims = + peerbook + |> Enum.filter(fn {_id, %{custody_group_count: cgc}} -> cgc == nil end) + |> Enum.sort_by(fn {_id, %{score: s}} -> s end) + |> Enum.take(evict_count) + + if victims != [] do + Logger.info( + "[Peerbook] Evicting #{length(victims)} low-score peers (#{len} total, target #{@target_peers})" + ) + + pruned = Enum.reduce(victims, peerbook, fn {id, _}, pb -> Map.delete(pb, id) end) + store_peerbook(pruned) + end + + excess > 0 -> + # Slightly above target: challenge random peers (existing behavior). + prune_size = calculate_prune_size(len) + + if prune_size > 0 do + peerbook + |> Enum.shuffle() + |> Enum.take(prune_size) + |> Enum.each(fn {peer_id, _} -> + Task.start(__MODULE__, :challenge_peer, [peer_id]) + end) + end + + true -> + :ok end end diff --git a/lib/lambda_ethereum_consensus/prom_ex_plugin.ex b/lib/lambda_ethereum_consensus/prom_ex_plugin.ex index ede9e7f36..e19f7536b 100644 --- a/lib/lambda_ethereum_consensus/prom_ex_plugin.ex +++ b/lib/lambda_ethereum_consensus/prom_ex_plugin.ex @@ -102,6 +102,7 @@ defmodule LambdaEthereumConsensus.PromExPlugin do [ Polling.build(:periodic_measurements, poll_rate, {__MODULE__, :periodic_measurements, []}, [ last_value([:db, :size, :total], unit: :byte), + last_value([:peerbook, :peers, :count], []), last_value([:vm, :message_queue, :length], tags: [:process]) ]) ] @@ -110,6 +111,7 @@ defmodule LambdaEthereumConsensus.PromExPlugin do def periodic_measurements() do message_queue_lengths() db_size() + peer_count() end def db_size() do @@ -117,6 +119,11 @@ defmodule LambdaEthereumConsensus.PromExPlugin do :telemetry.execute([:db, :size], %{total: db_size}) end + def peer_count() do + count = LambdaEthereumConsensus.P2P.Peerbook.peer_count() + :telemetry.execute([:peerbook, :peers], %{count: count}) + end + defp register_queue_length(name, len) do :telemetry.execute([:vm, :message_queue], %{length: len}, %{process: inspect(name)}) end diff --git a/lib/lambda_ethereum_consensus/state_transition/accessors.ex b/lib/lambda_ethereum_consensus/state_transition/accessors.ex index ccb79c86f..05c7fd87b 100644 --- a/lib/lambda_ethereum_consensus/state_transition/accessors.ex +++ b/lib/lambda_ethereum_consensus/state_transition/accessors.ex @@ -209,6 +209,45 @@ defmodule LambdaEthereumConsensus.StateTransition.Accessors do end end + @doc """ + Compute unslashed participating index sets for all 3 flag indices in a single O(V) pass. + Returns a list of 3 MapSets, one per flag index (0, 1, 2). + """ + @spec get_all_unslashed_participating_indices(BeaconState.t(), Types.epoch()) :: + [MapSet.t()] + def get_all_unslashed_participating_indices(%BeaconState{} = state, epoch) do + epoch_participation = + if epoch == get_current_epoch(state) do + state.current_epoch_participation + else + state.previous_epoch_participation + end + + state.validators + |> Aja.Vector.zip_with(epoch_participation, &{&1, &2}) + |> Aja.Vector.with_index() + |> Aja.Vector.foldl( + {MapSet.new(), MapSet.new(), MapSet.new()}, + &accumulate_participating_flags(&1, &2, epoch) + ) + |> Tuple.to_list() + end + + defp accumulate_participating_flags( + {{v, participation}, index}, + {set0, set1, set2}, + epoch + ) do + if not v.slashed and Predicates.active_validator?(v, epoch) do + set0 = if Predicates.has_flag(participation, 0), do: MapSet.put(set0, index), else: set0 + set1 = if Predicates.has_flag(participation, 1), do: MapSet.put(set1, index), else: set1 + set2 = if Predicates.has_flag(participation, 2), do: MapSet.put(set2, index), else: set2 + {set0, set1, set2} + else + {set0, set1, set2} + end + end + @doc """ Return the combined effective balance of the active validators. Note: ``get_total_balance`` returns ``EFFECTIVE_BALANCE_INCREMENT`` Gwei minimum to avoid divisions by zero. @@ -335,19 +374,60 @@ defmodule LambdaEthereumConsensus.StateTransition.Accessors do def compute_proposer_indices(state, epoch, seed, indices) do start_slot = Misc.compute_start_slot_at_epoch(epoch) slots_per_epoch = ChainSpec.get("SLOTS_PER_EPOCH") + rounds = ChainSpec.get("SHUFFLE_ROUND_COUNT") + max_effective_balance = ChainSpec.get("MAX_EFFECTIVE_BALANCE_ELECTRA") - 0..(slots_per_epoch - 1) - |> Enum.reduce_while({:ok, []}, fn i, {:ok, acc} -> - slot_seed = SszEx.hash(seed <> Misc.uint64_to_bytes(start_slot + i)) + # Extract effective balances from validators (not state.balances!) as a flat list. + # The spec uses validator.effective_balance for the proposer selection threshold. + effective_balances = + state.validators + |> Aja.Vector.map(& &1.effective_balance) + |> Aja.Vector.to_list() + + active_indices_list = Aja.Vector.to_list(indices) + + rust_result = + Ssz.compute_proposer_indices( + seed, + start_slot, + slots_per_epoch, + active_indices_list, + effective_balances, + max_effective_balance, + rounds + ) - case Misc.compute_proposer_index(state, indices, slot_seed) do - {:ok, proposer_index} -> {:cont, {:ok, [proposer_index | acc]}} - {:error, _} = err -> {:halt, err} - end - end) - |> case do - {:ok, reversed} -> {:ok, Enum.reverse(reversed)} - {:error, _} = err -> err + # Cross-check: verify the Rust NIF result against the pure Elixir + # implementation for the first slot. If they disagree, fall back to + # Elixir for the entire epoch (slower but correct). + slot_seed = SszEx.hash(seed <> Misc.uint64_to_bytes(start_slot)) + + case Misc.compute_proposer_index(state, indices, slot_seed) do + {:ok, elixir_first} -> + rust_first = List.first(rust_result) + + if elixir_first != rust_first do + Logger.error( + "[Accessors] Rust NIF proposer index mismatch at epoch #{epoch}! " <> + "Rust=#{rust_first}, Elixir=#{elixir_first}. Falling back to Elixir." + ) + + # Fall back to pure Elixir for correctness + elixir_result = + Enum.map(0..(slots_per_epoch - 1), fn i -> + slot = start_slot + i + ss = SszEx.hash(seed <> Misc.uint64_to_bytes(slot)) + {:ok, idx} = Misc.compute_proposer_index(state, indices, ss) + idx + end) + + {:ok, elixir_result} + else + {:ok, rust_result} + end + + _ -> + {:ok, rust_result} end end @@ -693,17 +773,13 @@ defmodule LambdaEthereumConsensus.StateTransition.Accessors do ``EFFECTIVE_BALANCE_INCREMENT`` Gwei minimum to avoid divisions by zero. Math safe up to ~10B ETH, after which this overflows uint64. """ - @spec get_total_balance(BeaconState.t(), Enumerable.t(Types.validator_index())) :: + @spec get_total_balance(BeaconState.t(), MapSet.t(Types.validator_index())) :: Types.gwei() - def get_total_balance(state, indices) do - indices = MapSet.new(indices) - + def get_total_balance(state, %MapSet{} = indices) do total_balance = - state.validators - |> Stream.with_index() - |> Stream.filter(fn {_, index} -> MapSet.member?(indices, index) end) - |> Stream.map(fn {%Types.Validator{effective_balance: n}, _} -> n end) - |> Enum.sum() + Enum.reduce(indices, 0, fn index, acc -> + acc + Aja.Vector.at!(state.validators, index).effective_balance + end) max(ChainSpec.get("EFFECTIVE_BALANCE_INCREMENT"), total_balance) end diff --git a/lib/lambda_ethereum_consensus/state_transition/cache.ex b/lib/lambda_ethereum_consensus/state_transition/cache.ex index 68d5844a1..862621f4c 100644 --- a/lib/lambda_ethereum_consensus/state_transition/cache.ex +++ b/lib/lambda_ethereum_consensus/state_transition/cache.ex @@ -14,7 +14,9 @@ defmodule LambdaEthereumConsensus.StateTransition.Cache do # k = {slot, {index, root}} ; v = [index] :beacon_committee, # k = {epoch, root} ; v = Aja.vec(index) - :active_validator_indices + :active_validator_indices, + # k = {epoch, root} ; v = [validator_index] + :sync_committee_indices ] @epoch_retain_window 3 @@ -31,9 +33,11 @@ defmodule LambdaEthereumConsensus.StateTransition.Cache do defp ms_less_than(const) do # NOTE: no need to specify false clause - # This match-spec returns true for tuples with epoch/slot smaller than `const` + # This match-spec returns true for ETS records {key, value} where the first + # element of the key (epoch or slot) is smaller than `const`. + # ETS records are {key, value} tuples, so we match {{epoch_or_slot, _rest}, _value}. Ex2ms.fun do - {{x, _}} when x < ^const -> true + {{x, _}, _} when x < ^const -> true end end @@ -42,6 +46,7 @@ defmodule LambdaEthereumConsensus.StateTransition.Cache do defp generate_cleanup_spec(:active_validator_count, key), do: cleanup_epoch_ms(key) defp generate_cleanup_spec(:beacon_committee, key), do: cleanup_slot_ms(key) defp generate_cleanup_spec(:active_validator_indices, key), do: cleanup_epoch_ms(key) + defp generate_cleanup_spec(:sync_committee_indices, key), do: cleanup_epoch_ms(key) @spec initialize_cache() :: :ok def initialize_cache(), do: @tables |> Enum.each(&init_table/1) @@ -75,5 +80,12 @@ defmodule LambdaEthereumConsensus.StateTransition.Cache do end def present?(table, key), do: :ets.member(table, key) - def set(table, key, value), do: :ets.insert_new(table, {key, value}) + + def set(table, key, value) do + unless :ets.member(table, key) do + clean_up_old_entries(table, key) + end + + :ets.insert_new(table, {key, value}) + end end diff --git a/lib/lambda_ethereum_consensus/state_transition/epoch_processing.ex b/lib/lambda_ethereum_consensus/state_transition/epoch_processing.ex index 11d5cd7e2..33bbd0076 100644 --- a/lib/lambda_ethereum_consensus/state_transition/epoch_processing.ex +++ b/lib/lambda_ethereum_consensus/state_transition/epoch_processing.ex @@ -108,114 +108,111 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do @spec process_slashings(BeaconState.t()) :: {:ok, BeaconState.t()} def process_slashings(%BeaconState{validators: validators, slashings: slashings} = state) do - epoch = Accessors.get_current_epoch(state) - total_balance = Accessors.get_total_active_balance(state) + slashed_sum = Enum.reduce(slashings, 0, &+/2) - proportional_slashing_multiplier = ChainSpec.get("PROPORTIONAL_SLASHING_MULTIPLIER_BELLATRIX") - epochs_per_slashings_vector = ChainSpec.get("EPOCHS_PER_SLASHINGS_VECTOR") - increment = ChainSpec.get("EFFECTIVE_BALANCE_INCREMENT") + # Short-circuit: when no slashings occurred, penalty is 0 for all validators. + # Avoids scanning 2.2M validators on the common case (no slashings on mainnet). + if slashed_sum == 0 do + {:ok, state} + else + epoch = Accessors.get_current_epoch(state) + total_balance = Accessors.get_total_active_balance(state) - slashed_sum = Enum.reduce(slashings, 0, &+/2) + proportional_slashing_multiplier = + ChainSpec.get("PROPORTIONAL_SLASHING_MULTIPLIER_BELLATRIX") - adjusted_total_slashing_balance = - min(slashed_sum * proportional_slashing_multiplier, total_balance) + epochs_per_slashings_vector = ChainSpec.get("EPOCHS_PER_SLASHINGS_VECTOR") + increment = ChainSpec.get("EFFECTIVE_BALANCE_INCREMENT") - penalty_per_effective_balance_increment = - div(adjusted_total_slashing_balance, div(total_balance, increment)) + adjusted_total_slashing_balance = + min(slashed_sum * proportional_slashing_multiplier, total_balance) - new_state = - validators - |> Stream.with_index() - |> Enum.reduce(state, fn {validator, index}, acc -> - if validator.slashed and - epoch + div(epochs_per_slashings_vector, 2) == validator.withdrawable_epoch do - effective_balance_increments = div(validator.effective_balance, increment) - penalty = penalty_per_effective_balance_increment * effective_balance_increments - - BeaconState.decrease_balance(acc, index, penalty) - else - acc - end - end) + penalty_per_ebi = + div(adjusted_total_slashing_balance, div(total_balance, increment)) - {:ok, new_state} + target_withdrawable_epoch = epoch + div(epochs_per_slashings_vector, 2) + + new_state = + validators + |> Aja.Vector.with_index() + |> Aja.Vector.foldl(state, fn {validator, index}, acc -> + if validator.slashed and validator.withdrawable_epoch == target_withdrawable_epoch do + penalty = penalty_per_ebi * div(validator.effective_balance, increment) + BeaconState.decrease_balance(acc, index, penalty) + else + acc + end + end) + + {:ok, new_state} + end end @spec process_registry_updates(BeaconState.t()) :: {:ok, BeaconState.t()} | {:error, String.t()} - def process_registry_updates(%BeaconState{validators: validators} = state) do + def process_registry_updates(%BeaconState{} = state) do ejection_balance = ChainSpec.get("EJECTION_BALANCE") current_epoch = Accessors.get_current_epoch(state) activation_exit_epoch = Misc.compute_activation_exit_epoch(current_epoch) - - validators - |> Enum.with_index() - |> Enum.reduce_while(state, fn {validator, idx}, state -> - handle_validator_registry_update( - state, - validator, - idx, - current_epoch, - activation_exit_epoch, - ejection_balance - ) - end) - |> then(fn - %BeaconState{} = state -> {:ok, state} - {:error, reason} -> {:error, reason} - end) + far_future_epoch = Constants.far_future_epoch() + min_activation_balance = ChainSpec.get("MIN_ACTIVATION_BALANCE") + finalized_epoch = state.finalized_checkpoint.epoch + + ctx = + {current_epoch, ejection_balance, activation_exit_epoch, far_future_epoch, + min_activation_balance, finalized_epoch} + + # Use Aja.Vector.foldl instead of Enum.with_index + Enum.reduce_while + # to avoid materializing the vector to a list (~24MB allocation) + try do + state.validators + |> Aja.Vector.with_index() + |> Aja.Vector.foldl(state, fn {validator, idx}, state -> + update_registry_for_validator(validator, idx, state, ctx) + end) + |> then(&{:ok, &1}) + catch + {:error, _} = err -> err + end end - defp handle_validator_registry_update( - %BeaconState{} = state, - %Validator{} = validator, + defp update_registry_for_validator( + validator, idx, - current_epoch, - activation_exit_epoch, - ejection_balance + state, + {current_epoch, ejection_balance, activation_exit_epoch, far_future_epoch, + min_activation_balance, finalized_epoch} ) do cond do - Predicates.eligible_for_activation_queue?(validator) -> - updated_validator = %{ - validator - | activation_eligibility_epoch: current_epoch + 1 - } - - {:cont, - %{ - state - | validators: Aja.Vector.replace_at!(state.validators, idx, updated_validator) - }} + validator.activation_eligibility_epoch == far_future_epoch && + validator.effective_balance >= min_activation_balance -> + updated = %{validator | activation_eligibility_epoch: current_epoch + 1} + replace_validator(state, idx, updated) Predicates.active_validator?(validator, current_epoch) && validator.effective_balance <= ejection_balance -> - case Mutators.initiate_validator_exit(state, validator) do - {:ok, {state, ejected_validator}} -> - updated_state = %{ - state - | validators: Aja.Vector.replace_at!(state.validators, idx, ejected_validator) - } - - {:cont, updated_state} + eject_validator(state, idx, validator) - {:error, msg} -> - {:halt, {:error, msg}} - end + validator.activation_eligibility_epoch <= finalized_epoch && + validator.activation_epoch == far_future_epoch -> + updated = %{validator | activation_epoch: activation_exit_epoch} + replace_validator(state, idx, updated) - Predicates.eligible_for_activation?(state, validator) -> - updated_validator = %{ - validator - | activation_epoch: activation_exit_epoch - } + true -> + state + end + end - updated_state = %{ - state - | validators: Aja.Vector.replace_at!(state.validators, idx, updated_validator) - } + defp replace_validator(state, idx, updated_validator) do + %{state | validators: Aja.Vector.replace_at!(state.validators, idx, updated_validator)} + end - {:cont, updated_state} + defp eject_validator(state, idx, validator) do + case Mutators.initiate_validator_exit(state, validator) do + {:ok, {state, ejected}} -> + replace_validator(state, idx, ejected) - true -> - {:cont, state} + {:error, msg} -> + throw({:error, msg}) end end @@ -252,33 +249,43 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do inactivity_score_bias = ChainSpec.get("INACTIVITY_SCORE_BIAS") inactivity_score_recovery_rate = ChainSpec.get("INACTIVITY_SCORE_RECOVERY_RATE") previous_epoch = Accessors.get_previous_epoch(state) - - # PERF: this can be inlined and combined with the next pipeline - {:ok, unslashed_participating_indices} = - Accessors.get_unslashed_participating_indices(state, timely_target_index, previous_epoch) - state_in_inactivity_leak? = Predicates.in_inactivity_leak?(state) - state.inactivity_scores - |> Stream.zip(state.validators) - |> Stream.with_index() - |> Enum.map(fn {{inactivity_score, validator}, index} -> - if Predicates.eligible_validator?(validator, previous_epoch) do - inactivity_score - |> Misc.increase_inactivity_score( - index, - unslashed_participating_indices, - inactivity_score_bias - ) - |> Misc.decrease_inactivity_score( - state_in_inactivity_leak?, - inactivity_score_recovery_rate - ) - else - inactivity_score - end - end) - |> then(&{:ok, %{state | inactivity_scores: &1}}) + # Single-pass: inline the participation check directly instead of building + # a MapSet of 2.2M entries then doing MapSet.member? lookups. + # Zip validators, participation flags, and inactivity_scores together. + participation = state.previous_epoch_participation + + new_scores = + state.inactivity_scores + |> Stream.zip(Aja.Vector.to_list(state.validators)) + |> Stream.zip(Aja.Vector.to_list(participation)) + |> Enum.map(fn {{inactivity_score, validator}, part_flags} -> + if Predicates.eligible_validator?(validator, previous_epoch) do + # Inline the unslashed participating check: + # not slashed AND active (already checked by eligible_validator?) AND has target flag + is_unslashed_participating = + not validator.slashed and + Predicates.has_flag(part_flags, timely_target_index) + + inactivity_score = + if is_unslashed_participating do + inactivity_score - min(1, inactivity_score) + else + inactivity_score + inactivity_score_bias + end + + if state_in_inactivity_leak? do + inactivity_score + else + inactivity_score - min(inactivity_score_recovery_rate, inactivity_score) + end + else + inactivity_score + end + end) + + {:ok, %{state | inactivity_scores: new_scores}} end @spec process_historical_summaries_update(BeaconState.t()) :: {:ok, BeaconState.t()} @@ -330,7 +337,8 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do previous_target_balance = get_total_participating_balance(state, target_index, previous_epoch) - current_target_balance = get_total_participating_balance(state, target_index, current_epoch) + current_target_balance = + get_total_participating_balance(state, target_index, current_epoch) total_active_balance = Accessors.get_total_active_balance(state) @@ -343,7 +351,7 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do end end - # NOTE: epoch must be the current or previous one + # Single-pass per epoch: zip_with produces integers (0 or balance), foldl sums them. defp get_total_participating_balance(state, flag_index, epoch) do epoch_participation = if epoch == Accessors.get_current_epoch(state) do @@ -354,11 +362,12 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do state.validators |> Aja.Vector.zip_with(epoch_participation, fn v, participation -> - {not v.slashed and Predicates.active_validator?(v, epoch) and - Predicates.has_flag(participation, flag_index), v.effective_balance} + if not v.slashed and Predicates.active_validator?(v, epoch) and + Predicates.has_flag(participation, flag_index), + do: v.effective_balance, + else: 0 end) - |> Aja.Vector.filter(&elem(&1, 0)) - |> Aja.Enum.reduce(0, fn {true, balance}, acc -> acc + balance end) + |> Aja.Vector.foldl(0, fn balance, acc -> acc + balance end) end defp weigh_justification_and_finalization( @@ -434,28 +443,107 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do if Accessors.get_current_epoch(state) == Constants.genesis_epoch() do {:ok, state} else - deltas = - Constants.participation_flag_weights() - |> Stream.with_index() - |> Stream.map(fn {weight, index} -> - BeaconState.get_flag_index_deltas(state, weight, index) + previous_epoch = Accessors.get_previous_epoch(state) + base_reward_per_increment = Accessors.get_base_reward_per_increment(state) + effective_balance_increment = ChainSpec.get("EFFECTIVE_BALANCE_INCREMENT") + weights = Constants.participation_flag_weights() + weight_denominator = Constants.weight_denominator() + in_inactivity_leak? = Predicates.in_inactivity_leak?(state) + timely_head_flag_index = Constants.timely_head_flag_index() + timely_target_flag_index = Constants.timely_target_flag_index() + + penalty_denominator = + ChainSpec.get("INACTIVITY_SCORE_BIAS") * + ChainSpec.get("INACTIVITY_PENALTY_QUOTIENT_BELLATRIX") + + active_increments = + div(Accessors.get_total_active_balance(state), effective_balance_increment) + + participation = state.previous_epoch_participation + + # Pass 1: compute participating balances for each flag (single O(V) scan) + {bal0, bal1, bal2} = + state.validators + |> Aja.Vector.zip_with(participation, fn v, p -> {v, p} end) + |> Aja.Vector.foldl({0, 0, 0}, fn {v, p}, {b0, b1, b2} -> + if not v.slashed and Predicates.active_validator?(v, previous_epoch) do + eb = v.effective_balance + b0 = if Predicates.has_flag(p, 0), do: b0 + eb, else: b0 + b1 = if Predicates.has_flag(p, 1), do: b1 + eb, else: b1 + b2 = if Predicates.has_flag(p, 2), do: b2 + eb, else: b2 + {b0, b1, b2} + else + {b0, b1, b2} + end end) - |> Stream.concat([BeaconState.get_inactivity_penalty_deltas(state)]) - |> Stream.zip() - |> Aja.Vector.new() - state.balances - |> Aja.Vector.zip_with(deltas, &update_balance/2) - |> then(&{:ok, %BeaconState{state | balances: &1}}) + participating_increments = [ + div(max(effective_balance_increment, bal0), effective_balance_increment), + div(max(effective_balance_increment, bal1), effective_balance_increment), + div(max(effective_balance_increment, bal2), effective_balance_increment) + ] + + ctx = + {weights, participating_increments, active_increments, effective_balance_increment, + base_reward_per_increment, weight_denominator, in_inactivity_leak?, + timely_head_flag_index, timely_target_flag_index, penalty_denominator, previous_epoch} + + # Pass 2: compute all deltas + apply to balances (single O(V) scan) + new_balances = + state.validators + |> Aja.Vector.zip_with(participation, fn v, p -> {v, p} end) + |> Aja.Vector.zip_with(state.balances, fn {v, p}, bal -> {v, p, bal} end) + |> Aja.Vector.zip_with( + Aja.Vector.new(state.inactivity_scores), + fn {v, p, bal}, iscore -> {v, p, bal, iscore} end + ) + |> Aja.Vector.map(fn {validator, part_flags, balance, inactivity_score} -> + compute_and_apply_deltas(validator, part_flags, balance, inactivity_score, ctx) + end) + + {:ok, %BeaconState{state | balances: new_balances}} end end - defp update_balance(balance, deltas) do - deltas - |> Tuple.to_list() - |> Enum.reduce(balance, fn delta, balance -> - max(balance + delta, 0) - end) + defp compute_and_apply_deltas(validator, part_flags, balance, inactivity_score, ctx) do + {weights, pi_list, ai, ebi, brpi, wd, in_leak?, thfi, ttfi, pd, prev_epoch} = ctx + + if not Predicates.eligible_validator?(validator, prev_epoch) do + balance + else + base_reward = div(validator.effective_balance, ebi) * brpi + + # Apply 3 flag deltas with per-delta clamping + balance = + weights + |> Enum.with_index() + |> Enum.reduce(balance, fn {weight, flag_index}, bal -> + upi = Enum.at(pi_list, flag_index) + is_unslashed = not validator.slashed and Predicates.has_flag(part_flags, flag_index) + + delta = + cond do + is_unslashed and in_leak? -> 0 + is_unslashed -> div(base_reward * weight * upi, ai * wd) + flag_index != thfi -> -div(base_reward * weight, wd) + true -> 0 + end + + max(bal + delta, 0) + end) + + # Apply inactivity penalty delta with per-delta clamping + is_target_unslashed = not validator.slashed and Predicates.has_flag(part_flags, ttfi) + + inactivity_delta = + if not is_target_unslashed do + -div(validator.effective_balance * inactivity_score, pd) + else + 0 + end + + max(balance + inactivity_delta, 0) + end end @spec process_pending_deposits(BeaconState.t()) :: {:ok, BeaconState.t()} @@ -464,43 +552,60 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do state.deposit_balance_to_consume + Accessors.get_activation_exit_churn_limit(state) finalized_slot = Misc.compute_start_slot_at_epoch(state.finalized_checkpoint.epoch) + max_pending = ChainSpec.get("MAX_PENDING_DEPOSITS_PER_EPOCH") - {state, churn_limit_reached, processed_amount, deposits_to_postpone, last_processed_index} = + # Pre-build a pubkey→index map for deposit pubkeys with ONE validator scan. + # At most 16 deposits, so the lookup set and result map are tiny. + deposit_pubkeys = state.pending_deposits - |> Enum.with_index() - |> Enum.reduce_while({state, false, 0, [], 0}, fn {deposit, index}, - {state, churn_limit_reached, - processed_amount, deposits_to_postpone, - _last_processed_index} -> - cond do - # Do not process deposit requests if Eth1 bridge deposits are not yet applied. - deposit.slot > Constants.genesis_slot() && - state.eth1_deposit_index < state.deposit_requests_start_index -> - {:halt, - {state, churn_limit_reached, processed_amount, deposits_to_postpone, index - 1}} - - # Check if deposit has been finalized, otherwise, stop processing. - deposit.slot > finalized_slot -> - {:halt, - {state, churn_limit_reached, processed_amount, deposits_to_postpone, index - 1}} - - # Check if number of processed deposits has not reached the limit, otherwise, stop processing. - index >= ChainSpec.get("MAX_PENDING_DEPOSITS_PER_EPOCH") -> - {:halt, - {state, churn_limit_reached, processed_amount, deposits_to_postpone, index - 1}} + |> Enum.take(max_pending) + |> MapSet.new(& &1.pubkey) - true -> - handle_pending_deposit( - deposit, - state, - churn_limit_reached, - processed_amount, - deposits_to_postpone, - index, - available_for_processing - ) + pubkey_to_index = build_deposit_pubkey_index(state.validators, deposit_pubkeys) + + {state, churn_limit_reached, processed_amount, deposits_to_postpone, last_processed_index, + _pubkey_to_index} = + state.pending_deposits + |> Enum.with_index() + |> Enum.reduce_while( + {state, false, 0, [], 0, pubkey_to_index}, + fn {deposit, index}, + {state, churn_limit_reached, processed_amount, deposits_to_postpone, + _last_processed_index, pubkey_to_index} -> + cond do + # Do not process deposit requests if Eth1 bridge deposits are not yet applied. + deposit.slot > Constants.genesis_slot() && + state.eth1_deposit_index < state.deposit_requests_start_index -> + {:halt, + {state, churn_limit_reached, processed_amount, deposits_to_postpone, index - 1, + pubkey_to_index}} + + # Check if deposit has been finalized, otherwise, stop processing. + deposit.slot > finalized_slot -> + {:halt, + {state, churn_limit_reached, processed_amount, deposits_to_postpone, index - 1, + pubkey_to_index}} + + # Check if number of processed deposits has not reached the limit, otherwise, stop processing. + index >= max_pending -> + {:halt, + {state, churn_limit_reached, processed_amount, deposits_to_postpone, index - 1, + pubkey_to_index}} + + true -> + handle_pending_deposit( + deposit, + state, + churn_limit_reached, + processed_amount, + deposits_to_postpone, + index, + available_for_processing, + pubkey_to_index + ) + end end - end) + ) deposit_balance_to_consume = if churn_limit_reached do @@ -519,6 +624,23 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do }} end + # Single scan of validators to find indices for a small set of deposit pubkeys + defp build_deposit_pubkey_index(validators, deposit_pubkeys) do + if MapSet.size(deposit_pubkeys) == 0 do + %{} + else + validators + |> Aja.Vector.with_index() + |> Aja.Vector.foldl(%{}, &match_deposit_pubkey(&1, &2, deposit_pubkeys)) + end + end + + defp match_deposit_pubkey({validator, idx}, acc, deposit_pubkeys) do + if MapSet.member?(deposit_pubkeys, validator.pubkey), + do: Map.put_new(acc, validator.pubkey, idx), + else: acc + end + defp handle_pending_deposit( deposit, state, @@ -526,32 +648,39 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do processed_amount, deposits_to_postpone, index, - available_for_processing + available_for_processing, + pubkey_to_index ) do far_future_epoch = Constants.far_future_epoch() - next_epoch = Accessors.get_current_epoch(state) + # Spec: next_epoch = Epoch(get_current_epoch(state) + 1) + next_epoch = Accessors.get_current_epoch(state) + 1 {is_validator_exited, is_validator_withdrawn} = - case Enum.find(state.validators, fn v -> v.pubkey == deposit.pubkey end) do - %Validator{} = validator -> - {validator.exit_epoch < far_future_epoch, validator.withdrawable_epoch < next_epoch} - - _ -> + case Map.get(pubkey_to_index, deposit.pubkey) do + nil -> {false, false} + + validator_index -> + validator = Aja.Vector.at!(state.validators, validator_index) + {validator.exit_epoch < far_future_epoch, validator.withdrawable_epoch < next_epoch} end cond do # Deposited balance will never become active. Increase balance but do not consume churn is_validator_withdrawn -> - {:ok, state} = apply_pending_deposit(state, deposit) + {:ok, state, pubkey_to_index} = apply_pending_deposit(state, deposit, pubkey_to_index) - {:cont, {state, churn_limit_reached, processed_amount, deposits_to_postpone, index}} + {:cont, + {state, churn_limit_reached, processed_amount, deposits_to_postpone, index, + pubkey_to_index}} # Validator is exiting, postpone the deposit until after withdrawable epoch is_validator_exited -> deposits_to_postpone = Enum.concat(deposits_to_postpone, [deposit]) - {:cont, {state, churn_limit_reached, processed_amount, deposits_to_postpone, index}} + {:cont, + {state, churn_limit_reached, processed_amount, deposits_to_postpone, index, + pubkey_to_index}} true -> # Check if deposit fits in the churn, otherwise, do no more deposit processing in this epoch. @@ -559,12 +688,14 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do processed_amount + deposit.amount > available_for_processing if is_churn_limit_reached do - {:halt, {state, true, processed_amount, deposits_to_postpone, index - 1}} + {:halt, + {state, true, processed_amount, deposits_to_postpone, index - 1, pubkey_to_index}} else # Consume churn and apply deposit. processed_amount = processed_amount + deposit.amount - {:ok, state} = apply_pending_deposit(state, deposit) - {:cont, {state, false, processed_amount, deposits_to_postpone, index}} + {:ok, state, pubkey_to_index} = apply_pending_deposit(state, deposit, pubkey_to_index) + + {:cont, {state, false, processed_amount, deposits_to_postpone, index, pubkey_to_index}} end end end @@ -631,9 +762,8 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do }} end - defp apply_pending_deposit(state, deposit) do - index = - Enum.find_index(state.validators, fn validator -> validator.pubkey == deposit.pubkey end) + defp apply_pending_deposit(state, deposit, pubkey_to_index) do + index = Map.get(pubkey_to_index, deposit.pubkey) current_validator? = is_number(index) @@ -648,19 +778,24 @@ defmodule LambdaEthereumConsensus.StateTransition.EpochProcessing do cond do current_validator? -> - {:ok, BeaconState.increase_balance(state, index, deposit.amount)} + {:ok, BeaconState.increase_balance(state, index, deposit.amount), pubkey_to_index} !current_validator? && valid_signature? -> - Mutators.add_validator_to_registry( - state, - deposit.pubkey, - deposit.withdrawal_credentials, - deposit.amount - ) + {:ok, new_state} = + Mutators.add_validator_to_registry( + state, + deposit.pubkey, + deposit.withdrawal_credentials, + deposit.amount + ) + + # Update map so subsequent deposits for this pubkey find the new validator + new_index = Aja.Vector.size(state.validators) + {:ok, new_state, Map.put(pubkey_to_index, deposit.pubkey, new_index)} true -> # Neither a validator nor have a valid signature, we do not apply the deposit - {:ok, state} + {:ok, state, pubkey_to_index} end end end diff --git a/lib/lambda_ethereum_consensus/state_transition/operations.ex b/lib/lambda_ethereum_consensus/state_transition/operations.ex index d268c87b6..3b7953f6d 100644 --- a/lib/lambda_ethereum_consensus/state_transition/operations.ex +++ b/lib/lambda_ethereum_consensus/state_transition/operations.ex @@ -5,6 +5,7 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do alias LambdaEthereumConsensus.Metrics alias LambdaEthereumConsensus.StateTransition.Accessors + alias LambdaEthereumConsensus.StateTransition.Cache alias LambdaEthereumConsensus.StateTransition.Math alias LambdaEthereumConsensus.StateTransition.Misc alias LambdaEthereumConsensus.StateTransition.Mutators @@ -146,20 +147,22 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do total_proposer_reward = BitVector.count(aggregate.sync_committee_bits) * proposer_reward - # PERF: make Map with committee_index by pubkey, then - # Enum.map validators -> new balance all in place, without map_reduce - state.validators - |> get_sync_committee_indices(committee_pubkeys) - |> Stream.with_index() - |> Stream.map(fn {validator_index, committee_index} -> - if BitVector.set?(aggregate.sync_committee_bits, committee_index), - do: {validator_index, participant_reward}, - else: {validator_index, -participant_reward} - end) - |> Enum.reduce(state.balances, fn {validator_index, delta}, balances -> - Aja.Vector.update_at!(balances, validator_index, &max(&1 + delta, 0)) - end) - |> then(&%{state | balances: &1}) + # Cache sync committee indices (stable within a sync committee period) + committee_indices = get_cached_sync_committee_indices(state, committee_pubkeys) + + balances = + committee_indices + |> Enum.with_index() + |> Enum.reduce(state.balances, fn {validator_index, committee_index}, balances -> + delta = + if BitVector.set?(aggregate.sync_committee_bits, committee_index), + do: participant_reward, + else: -participant_reward + + Aja.Vector.update_at!(balances, validator_index, &max(&1 + delta, 0)) + end) + + %{state | balances: balances} |> BeaconState.increase_balance(proposer_index, total_proposer_reward) |> then(&{:ok, &1}) end @@ -199,23 +202,44 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do {participant_reward, proposer_reward} end + defp get_cached_sync_committee_indices(state, committee_pubkeys) do + epoch = Accessors.get_current_epoch(state) + + compute_fn = fn -> + get_sync_committee_indices(state.validators, committee_pubkeys) + end + + case Accessors.get_block_root_at_slot( + state, + max(Misc.compute_start_slot_at_epoch(epoch), 1) - 1 + ) do + {:ok, root} -> Cache.lazily_compute(:sync_committee_indices, {epoch, root}, compute_fn) + _ -> compute_fn.() + end + end + @spec get_sync_committee_indices(Aja.Vector.t(Validator.t()), list(Types.bls_pubkey())) :: list(Types.validator_index()) defp get_sync_committee_indices(validators, committee_pubkeys) do + # Build map of committee pubkey -> [committee_indices] (only 512 entries) pk_map = committee_pubkeys - |> Stream.with_index() + |> Enum.with_index() |> Enum.reduce(%{}, fn {pk, i}, map -> Map.update(map, pk, [i], &[i | &1]) end) + # Scan validators to resolve pubkeys to validator indices validators - |> Stream.with_index() - |> Stream.map(fn {%Validator{pubkey: pubkey}, i} -> {Map.get(pk_map, pubkey), i} end) - |> Stream.reject(fn {v, _} -> is_nil(v) end) - |> Stream.flat_map(fn {list, i} -> list |> Stream.map(&{&1, i}) end) - |> Enum.sort(fn {v1, _}, {v2, _} -> v1 <= v2 end) - |> Enum.map(fn {_, i} -> i end) + |> Aja.Vector.with_index() + |> Aja.Vector.foldl([], fn {%Validator{pubkey: pubkey}, validator_idx}, acc -> + case Map.get(pk_map, pubkey) do + nil -> acc + committee_indices -> Enum.reduce(committee_indices, acc, &[{&1, validator_idx} | &2]) + end + end) + |> Enum.sort() + |> Enum.map(fn {_committee_idx, validator_idx} -> validator_idx end) end @doc """ @@ -388,57 +412,120 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do ) end) - bound = state.validators |> Aja.Vector.size() |> min(max_validators_per_withdrawals_sweep) - # Sweep for remaining. - non_partial_withdrawals = - Stream.zip([state.validators, state.balances]) - |> Stream.with_index() - |> Stream.cycle() - |> Stream.drop(state.next_withdrawal_validator_index) - |> Stream.take(bound) - |> Stream.map(fn {{validator, balance}, index} -> - partially_withdrawn_balance = - Enum.sum( - for withdrawal <- pending_partial_withdrawals, - withdrawal.validator_index == index, - do: withdrawal.amount - ) - - balance = balance - partially_withdrawn_balance - - cond do - Validator.fully_withdrawable_validator?(validator, balance, epoch) -> - {validator, balance, index} - - Validator.partially_withdrawable_validator?(validator, balance) -> - {validator, balance - Validator.get_max_effective_balance(validator), index} + validator_count = Aja.Vector.size(state.validators) + bound = min(validator_count, max_validators_per_withdrawals_sweep) - true -> - nil - end + # Pre-build partial withdrawal amounts by validator index for O(1) lookup + partial_amounts = + Enum.reduce(pending_partial_withdrawals, %{}, fn w, acc -> + Map.update(acc, w.validator_index, w.amount, &(&1 + w.amount)) end) - |> Stream.reject(&is_nil/1) - |> Stream.with_index() - |> Stream.map(fn {{validator, balance, validator_index}, index} -> - %Validator{withdrawal_credentials: withdrawal_credentials} = validator - <<_::binary-size(12), execution_address::binary>> = withdrawal_credentials + # Extract the sweep range as lists for O(1) sequential access instead of + # per-element Aja.Vector.at! (O(log N)). Handles wrap-around at validator_count. + start_index = state.next_withdrawal_validator_index - %Withdrawal{ - index: index + withdrawal_index, - validator_index: validator_index, - address: execution_address, - amount: balance - } - end) + {validator_list, balance_list, index_list} = + extract_sweep_range(state.validators, state.balances, start_index, validator_count, bound) + + non_partial_withdrawals = + sweep_validator_list( + validator_list, + balance_list, + index_list, + partial_amounts, + epoch, + withdrawal_index, + [] + ) complete_withdrawals = - (pending_partial_withdrawals ++ Enum.to_list(non_partial_withdrawals)) + (pending_partial_withdrawals ++ non_partial_withdrawals) |> Enum.take(max_withdrawals_per_payload) {complete_withdrawals, processed_partial_withdrawals_count} end + # Extract the sweep range as plain lists for O(1) sequential traversal. + # Handles wrap-around when start + bound > validator_count. + defp extract_sweep_range(validators, balances, start, count, bound) do + end_index = start + bound + + if end_index <= count do + # No wrap-around: single contiguous slice + vl = validators |> Aja.Vector.slice(start..(end_index - 1)) |> Aja.Vector.to_list() + bl = balances |> Aja.Vector.slice(start..(end_index - 1)) |> Aja.Vector.to_list() + il = Enum.to_list(start..(end_index - 1)) + {vl, bl, il} + else + # Wrap-around: two slices + first_len = count - start + second_len = bound - first_len + + vl = + Aja.Vector.to_list(Aja.Vector.slice(validators, start..(count - 1))) ++ + Aja.Vector.to_list(Aja.Vector.slice(validators, 0..(second_len - 1))) + + bl = + Aja.Vector.to_list(Aja.Vector.slice(balances, start..(count - 1))) ++ + Aja.Vector.to_list(Aja.Vector.slice(balances, 0..(second_len - 1))) + + il = Enum.to_list(start..(count - 1)) ++ Enum.to_list(0..(second_len - 1)) + {vl, bl, il} + end + end + + # Sweep over pre-extracted lists with O(1) sequential access. + defp sweep_validator_list([], [], [], _partial_amounts, _epoch, _withdrawal_index, acc) do + Enum.reverse(acc) + end + + defp sweep_validator_list( + [validator | vrest], + [raw_balance | brest], + [index | irest], + partial_amounts, + epoch, + withdrawal_index, + acc + ) do + balance = raw_balance - Map.get(partial_amounts, index, 0) + + acc = + cond do + Validator.fully_withdrawable_validator?(validator, balance, epoch) -> + <<_::binary-size(12), addr::binary>> = validator.withdrawal_credentials + + [ + %Withdrawal{ + index: withdrawal_index + length(acc), + validator_index: index, + address: addr, + amount: balance + } + | acc + ] + + Validator.partially_withdrawable_validator?(validator, balance) -> + <<_::binary-size(12), addr::binary>> = validator.withdrawal_credentials + + [ + %Withdrawal{ + index: withdrawal_index + length(acc), + validator_index: index, + address: addr, + amount: balance - Validator.get_max_effective_balance(validator) + } + | acc + ] + + true -> + acc + end + + sweep_validator_list(vrest, brest, irest, partial_amounts, epoch, withdrawal_index, acc) + end + defp process_partial_withdrawal( state, withdrawal, @@ -713,18 +800,25 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do end @spec validate_attestation(BeaconState.t(), Attestation.t()) :: :ok | {:error, String.t()} - def validate_attestation( - state, - %Attestation{data: data, aggregation_bits: aggregation_bits} = attestation - ) do + def validate_attestation(state, attestation) do + with {:ok, indexed_attestation} <- validate_attestation_structure(state, attestation) do + check_valid_indexed_attestation(state, indexed_attestation) + end + end + + # Validate attestation structure (cheap checks + committee lookups) and return + # the indexed attestation for BLS verification and attesting indices extraction. + defp validate_attestation_structure( + state, + %Attestation{data: data, aggregation_bits: aggregation_bits} = attestation + ) do with :ok <- check_valid_target_epoch(data, state), :ok <- check_epoch_matches(data), :ok <- check_valid_slot_range(data, state), :ok <- check_data_index_zero(data), {:ok, committee_offset} <- check_committee_indices(attestation, state), - :ok <- check_matching_aggregation_bits_length(aggregation_bits, committee_offset), - {:ok, indexed_attestation} <- Accessors.get_indexed_attestation(state, attestation) do - check_valid_indexed_attestation(state, indexed_attestation) + :ok <- check_matching_aggregation_bits_length(aggregation_bits, committee_offset) do + Accessors.get_indexed_attestation(state, attestation) end end @@ -827,11 +921,15 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do current_epoch_updates, attestation_index ) do - with :ok <- validate_attestation(state, att), + # Validate structure and get indexed attestation in one pass, then extract + # attesting_indices from it. This avoids calling get_attesting_indices twice + # (once inside validate_attestation, once here). + with {:ok, indexed_attestation} <- validate_attestation_structure(state, att), + :ok <- check_valid_indexed_attestation(state, indexed_attestation), slot = state.slot - data.slot, {:ok, flag_indices} <- - Accessors.get_attestation_participation_flag_indices(state, data, slot), - {:ok, attesting_indices} <- Accessors.get_attesting_indices(state, att) do + Accessors.get_attestation_participation_flag_indices(state, data, slot) do + attesting_indices = MapSet.new(indexed_attestation.attesting_indices) is_current_epoch = data.target.epoch == Accessors.get_current_epoch(state) epoch_updates = if is_current_epoch, do: current_epoch_updates, else: previous_epoch_updates @@ -845,9 +943,7 @@ defmodule LambdaEthereumConsensus.StateTransition.Operations do v = {attestation_index, weights_mask} new_epoch_updates = - attesting_indices - |> Enum.to_list() - |> Enum.reduce(epoch_updates, fn i, epoch_updates -> + Enum.reduce(attesting_indices, epoch_updates, fn i, epoch_updates -> Map.update(epoch_updates, i, [v], &merge_masks(&1, v)) end) diff --git a/lib/lambda_ethereum_consensus/state_transition/shuffling.ex b/lib/lambda_ethereum_consensus/state_transition/shuffling.ex index bd15e6080..d64ccab63 100644 --- a/lib/lambda_ethereum_consensus/state_transition/shuffling.ex +++ b/lib/lambda_ethereum_consensus/state_transition/shuffling.ex @@ -46,17 +46,18 @@ defmodule LambdaEthereumConsensus.StateTransition.Shuffling do def shuffle_list(input, seed) do rounds = ChainSpec.get("SHUFFLE_ROUND_COUNT") - shuffle_list(input, rounds - 1, seed) - end - - @spec shuffle_list(Aja.Vector.t(), non_neg_integer(), binary()) :: - Aja.Vector.t() - defp shuffle_list(input, round, _seed) when round < 0, do: input + # Use Rust NIF for the full shuffle — 5-10x faster than Elixir/:atomics. + # Convert Aja.Vector → list → NIF → list → Aja.Vector. + input + |> Aja.Vector.to_list() + |> Ssz.shuffle_list(seed, rounds) + |> Aja.Vector.new() + end - defp shuffle_list(input, round, seed) do - input_size = Aja.Enum.count(input) + defp shuffle_rounds(_arr, _input_size, round, _seed) when round < 0, do: :ok + defp shuffle_rounds(arr, input_size, round, seed) do round_bytes = :binary.encode_unsigned(round, :little) pivot = @@ -70,22 +71,19 @@ defmodule LambdaEthereumConsensus.StateTransition.Shuffling do source = (seed <> round_bytes <> position_bytes(pivot >>> 8)) |> SszEx.hash() byte_v = :binary.at(source, (pivot &&& 0xFF) >>> 3) - {_source, _byte_v, input} = - Enum.reduce(0..(mirror - 1)//1, {source, byte_v, input}, fn i, {source, byte_v, input} -> + {_source, _byte_v} = + Enum.reduce(0..(mirror - 1)//1, {source, byte_v}, fn i, {source, byte_v} -> j = pivot - i source = source(seed, round_bytes, j, source) byte_v = byte_v(source, j, byte_v) bit_v = bit_v(byte_v, j) - input = - if bit_v == 1 do - swap_values(input, i, j) - else - input - end + if bit_v == 1 do + swap_atomics(arr, i, j) + end - {source, byte_v, input} + {source, byte_v} end) mirror = (pivot + input_size + 1) >>> 1 @@ -93,10 +91,8 @@ defmodule LambdaEthereumConsensus.StateTransition.Shuffling do source = (seed <> round_bytes <> position_bytes(list_end >>> 8)) |> SszEx.hash() byte_v = :binary.at(source, (list_end &&& 0xFF) >>> 3) - {_source, _byte_v, input} = - Enum.reduce((pivot + 1)..(mirror - 1)//1, {source, byte_v, input}, fn i, - {source, byte_v, - input} -> + {_source, _byte_v} = + Enum.reduce((pivot + 1)..(mirror - 1)//1, {source, byte_v}, fn i, {source, byte_v} -> loop_iter = i - (pivot + 1) j = list_end - loop_iter @@ -104,17 +100,22 @@ defmodule LambdaEthereumConsensus.StateTransition.Shuffling do byte_v = byte_v(source, j, byte_v) bit_v = bit_v(byte_v, j) - input = - if bit_v == 1 do - swap_values(input, i, j) - else - input - end + if bit_v == 1 do + swap_atomics(arr, i, j) + end - {source, byte_v, input} + {source, byte_v} end) - shuffle_list(input, round - 1, seed) + shuffle_rounds(arr, input_size, round - 1, seed) + end + + # O(1) swap using :atomics (1-indexed) + defp swap_atomics(arr, i, j) do + vi = :atomics.get(arr, i + 1) + vj = :atomics.get(arr, j + 1) + :atomics.put(arr, i + 1, vj) + :atomics.put(arr, j + 1, vi) end @spec position_bytes(integer()) :: binary() @@ -149,13 +150,4 @@ defmodule LambdaEthereumConsensus.StateTransition.Shuffling do padding = max(n - byte_size, 0) <> end - - def swap_values(list, i, j) do - value_i = Aja.Enum.at(list, i) - value_j = Aja.Enum.at(list, j) - - list - |> Aja.Vector.replace_at(i, value_j) - |> Aja.Vector.replace_at(j, value_i) - end end diff --git a/lib/lambda_ethereum_consensus/state_transition/state_transition.ex b/lib/lambda_ethereum_consensus/state_transition/state_transition.ex index 356378619..6ad5887f8 100644 --- a/lib/lambda_ethereum_consensus/state_transition/state_transition.ex +++ b/lib/lambda_ethereum_consensus/state_transition/state_transition.ex @@ -40,10 +40,20 @@ defmodule LambdaEthereumConsensus.StateTransition do } } - verified_transition(state_info.beacon_state, block_info, previous_roots) + verified_transition( + state_info.beacon_state, + block_info, + previous_roots, + state_info.field_hashes + ) end - def verified_transition(%BeaconState{} = state, block_info, previous_roots \\ %{}) do + def verified_transition( + %BeaconState{} = state, + block_info, + previous_roots \\ %{}, + prev_field_hashes \\ %{} + ) do with {:ok, st, timings} <- transition(state, block_info.signed_block, previous_roots) do {sig_result, timings} = timed(:signature_verify, timings, fn -> @@ -53,22 +63,498 @@ defmodule LambdaEthereumConsensus.StateTransition do end) with {:ok, st} <- sig_result do + # Determine which field hashes can be reused from the previous state. + # On epoch boundary blocks, most fields change — don't cache anything. + # On non-epoch blocks, cache expensive fields that don't change. + cached_field_hashes = + cacheable_field_hashes(timings, block_info.signed_block.message, prev_field_hashes) + + # Try incremental hashing for large Aja.Vector fields: collect changed indices + # from the block, apply them to the cached tree, and put hashes in cached_field_hashes. + # This avoids the expensive Aja.Vector.to_list + NIF decode for 2.2M entries. + # Pass prev_field_hashes so the NIF can validate the cache matches the parent fork. + cached_field_hashes = + maybe_incremental_balance_hash( + cached_field_hashes, + timings, + block_info.signed_block.message, + st, + prev_field_hashes + ) + + cached_field_hashes = + maybe_incremental_participation_hash( + cached_field_hashes, + timings, + block_info.signed_block.message, + st, + prev_field_hashes + ) + + cached_field_hashes = + maybe_incremental_randao_hash( + cached_field_hashes, + timings, + block_info.signed_block.message, + st, + prev_field_hashes + ) + {merkle_result, timings} = timed(:merkleization, timings, fn -> - StateInfo.from_beacon_state(st, block_root: block_info.root) + StateInfo.from_beacon_state(st, + block_root: block_info.root, + cached_field_hashes: cached_field_hashes + ) end) with {:ok, new_state_info} <- merkle_result do + # DIAGNOSTIC: at every epoch boundary, cross-check hash_beacon_state_cached + # against the generic hash_tree_root to detect merkleization divergence. + epoch_processed? = Map.has_key?(timings, :"epoch.rewards_and_penalties") + + if epoch_processed? do + cross_check_merkle_roots(st, new_state_info, block_info.signed_block.message.slot) + end + if block_info.signed_block.message.state_root == new_state_info.root do {:ok, new_state_info, timings} else - {:error, "mismatched state roots"} + # Incremental cache may have produced a wrong hash. Retry with full + # merkleization (no cached field hashes) before declaring the block invalid. + require Logger + + if cached_field_hashes != %{} do + Logger.warning( + "[StateTransition] Incremental cache produced wrong state root for " <> + "slot #{block_info.signed_block.message.slot}, retrying with full merkleization" + ) + + {retry_result, timings} = + timed(:merkleization, timings, fn -> + StateInfo.from_beacon_state(st, + block_root: block_info.root, + cached_field_hashes: %{} + ) + end) + + with {:ok, retry_state_info} <- retry_result do + if block_info.signed_block.message.state_root == retry_state_info.root do + {:ok, retry_state_info, timings} + else + diagnose_state_root_mismatch(st, block_info, retry_state_info) + {:error, "mismatched state roots"} + end + end + else + diagnose_state_root_mismatch(st, block_info, new_state_info) + {:error, "mismatched state roots"} + end end end end end end + # Proactive diagnostic: at every epoch boundary, compare hash_beacon_state_cached result + # against the generic hash_tree_root to detect which NIF path diverges. + defp cross_check_merkle_roots(state, state_info, slot) do + require Logger + + case Ssz.hash_tree_root(state) do + {:ok, generic_root} -> + if generic_root == state_info.root do + Logger.info( + "[StateTransition] MERKLE CROSS-CHECK slot #{slot}: MATCH " <> + "(both 0x#{Base.encode16(generic_root, case: :lower) |> String.slice(0, 16)}...)" + ) + else + Logger.error( + "[StateTransition] MERKLE CROSS-CHECK slot #{slot}: MISMATCH! " <> + "cached=0x#{Base.encode16(state_info.root, case: :lower) |> String.slice(0, 16)}..., " <> + "generic=0x#{Base.encode16(generic_root, case: :lower) |> String.slice(0, 16)}..." + ) + + # Identify which fields differ + diagnose_field_hashes(state, state_info.field_hashes) + end + + {:error, err} -> + Logger.error("[StateTransition] MERKLE CROSS-CHECK failed: #{inspect(err)}") + end + end + + # Diagnostic: when state root mismatches, compare hash_beacon_state_cached (field-by-field) + # against the generic hash_tree_root (full struct hashing) to isolate the bug. + defp diagnose_state_root_mismatch(state, block_info, state_info) do + slot = block_info.signed_block.message.slot + expected = block_info.signed_block.message.state_root + cached_root = state_info.root + + Logger.error( + "[StateTransition] DIAGNOSTIC: state root mismatch at slot #{slot}. " <> + "Expected: 0x#{Base.encode16(expected, case: :lower)}, " <> + "cached_hash_root: 0x#{Base.encode16(cached_root, case: :lower)}" + ) + + # Compare against the generic hash_tree_root (completely different NIF path) + case Ssz.hash_tree_root(state) do + {:ok, generic_root} -> + if generic_root == cached_root do + Logger.error( + "[StateTransition] DIAGNOSTIC: generic hash_tree_root AGREES with cached_hash " <> + "(both 0x#{Base.encode16(generic_root, case: :lower)}). " <> + "Bug is in STATE TRANSITION, not merkleization." + ) + else + Logger.error( + "[StateTransition] DIAGNOSTIC: generic hash_tree_root DISAGREES! " <> + "generic=0x#{Base.encode16(generic_root, case: :lower)}, " <> + "cached=0x#{Base.encode16(cached_root, case: :lower)}. " <> + "Bug is in hash_beacon_state_cached NIF." + ) + + # Find which field(s) differ + diagnose_field_hashes(state, state_info.field_hashes) + end + + {:error, err} -> + Logger.error("[StateTransition] DIAGNOSTIC: hash_tree_root failed: #{inspect(err)}") + end + end + + # Compare individual field hashes to find which field is wrong + defp diagnose_field_hashes(state, cached_field_hashes) do + field_names = [ + {0, :genesis_time}, + {1, :genesis_validators_root}, + {2, :slot}, + {3, :fork}, + {4, :latest_block_header}, + {5, :block_roots}, + {6, :state_roots}, + {7, :historical_roots}, + {8, :eth1_data}, + {9, :eth1_data_votes}, + {10, :eth1_deposit_index}, + {11, :validators}, + {12, :balances}, + {13, :randao_mixes}, + {14, :slashings}, + {15, :previous_epoch_participation}, + {16, :current_epoch_participation}, + {17, :justification_bits}, + {18, :previous_justified_checkpoint}, + {19, :current_justified_checkpoint}, + {20, :finalized_checkpoint}, + {21, :inactivity_scores}, + {22, :current_sync_committee}, + {23, :next_sync_committee}, + {24, :latest_execution_payload_header}, + {25, :next_withdrawal_index}, + {26, :next_withdrawal_validator_index}, + {27, :historical_summaries}, + {28, :deposit_requests_start_index}, + {29, :deposit_balance_to_consume}, + {30, :exit_balance_to_consume}, + {31, :earliest_exit_epoch}, + {32, :consolidation_balance_to_consume}, + {33, :earliest_consolidation_epoch}, + {34, :pending_deposits}, + {35, :pending_partial_withdrawals}, + {36, :pending_consolidations}, + {37, :proposer_lookahead} + ] + + for {idx, name} <- field_names do + cached_hash = Map.get(cached_field_hashes, idx) + + if cached_hash != nil do + Logger.error( + "[StateTransition] DIAGNOSTIC: field #{idx} (#{name}) " <> + "cached_hash=0x#{Base.encode16(cached_hash, case: :lower) |> String.slice(0, 16)}..." + ) + end + end + + :ok + end + + # Fields safe to cache on non-epoch blocks when no validator-modifying operations present. + # These fields are only modified during epoch processing (not block operations): + # 7 = historical_roots (frozen), 11 = validators, 14 = slashings, + # 17 = justification_bits, 18 = previous_justified_checkpoint, + # 19 = current_justified_checkpoint, 20 = finalized_checkpoint, + # 21 = inactivity_scores, 22 = current_sync_committee, + # 23 = next_sync_committee, 27 = historical_summaries, 37 = proposer_lookahead + # NOTE: field 15 (previous_epoch_participation) is NOT cacheable — attestation + # processing updates it on every block for previous-epoch attestations. + @cacheable_non_epoch_fields [7, 11, 14, 17, 18, 19, 20, 21, 22, 23, 27, 37] + + # When block operations DO modify validators (slashings, exits, BLS changes, + # consolidations, deposits), exclude fields also modified by those operations: + # 11 = validators (slashings/exits/BLS changes), 14 = slashings (slash_validator), + # 21 = inactivity_scores (add_validator_to_registry appends on new deposits) + @cacheable_non_epoch_fields_no_validators [7, 17, 18, 19, 20, 22, 23, 27, 37] + + defp cacheable_field_hashes(_timings, _block, prev_field_hashes) + when prev_field_hashes == %{}, + do: %{} + + defp cacheable_field_hashes(timings, block, prev_field_hashes) do + # If epoch processing happened, don't cache anything (most fields change) + epoch_processed? = Map.has_key?(timings, :"epoch.rewards_and_penalties") + + if epoch_processed? do + %{} + else + fields = + if block_modifies_validators?(block), + do: @cacheable_non_epoch_fields_no_validators, + else: @cacheable_non_epoch_fields + + Map.take(prev_field_hashes, fields) + end + end + + # Check if a block contains operations that can modify state.validators. + # Slashings, exits, BLS-to-execution changes, withdrawal requests (full exits), + # consolidation requests, and legacy deposits can all modify the validators vector. + # Deposit requests (execution_requests.deposits) only modify pending_deposits, not validators. + defp block_modifies_validators?(block) do + body = block.body + + body.proposer_slashings != [] or + body.attester_slashings != [] or + body.voluntary_exits != [] or + body.bls_to_execution_changes != [] or + body.deposits != [] or + body.execution_requests.withdrawals != [] or + body.execution_requests.consolidations != [] + end + + # Try to compute the balance field hash incrementally by passing only changed + # balance indices to the Rust NIF, avoiding the expensive Aja.Vector.to_list + # + NIF decode for 2.2M balances. Falls back gracefully on cache miss. + defp maybe_incremental_balance_hash( + cached_field_hashes, + timings, + block, + state, + prev_field_hashes + ) do + epoch_processed? = Map.has_key?(timings, :"epoch.rewards_and_penalties") + prev_hash = Map.get(prev_field_hashes, 12) + + if epoch_processed? or cached_field_hashes == %{} or is_nil(prev_hash) do + cached_field_hashes + else + case collect_changed_balance_indices(block, state) do + {:ok, indices} -> + updates = + indices + |> Enum.uniq() + |> Enum.map(fn idx -> {idx, Aja.Vector.at!(state.balances, idx)} end) + + case Ssz.update_balance_cache( + updates, + Aja.Vector.size(state.balances), + prev_hash + ) do + {:ok, hash} -> Map.put(cached_field_hashes, 12, hash) + {:error, :cache_miss} -> cached_field_hashes + end + + :skip -> + cached_field_hashes + end + end + end + + # Collect all validator indices whose balances changed during block processing. + # Sources: sync committee (512), withdrawals (<=16), proposer rewards, slashings. + defp collect_changed_balance_indices(block, state) do + # If slashings occurred, the slashed validator's balance changes AND the + # whistleblower/proposer reward is spread — hard to track precisely. Skip. + # Also skip when withdrawal/consolidation requests exist — these can trigger + # switch_to_compounding_validator → queue_excess_active_balance, which modifies + # balances at indices we can't easily predict. + body = block.body + + if body.proposer_slashings != [] or body.attester_slashings != [] or + body.execution_requests.withdrawals != [] or + body.execution_requests.consolidations != [] do + :skip + else + epoch = Accessors.get_current_epoch(state) + + # Sync committee indices: look up from ETS cache (populated by process_sync_aggregate) + sync_indices = + case Accessors.get_block_root_at_slot( + state, + max(Misc.compute_start_slot_at_epoch(epoch), 1) - 1 + ) do + {:ok, root} -> + case :ets.lookup(:sync_committee_indices, {epoch, root}) do + [{{^epoch, ^root}, indices}] -> indices + [] -> :miss + end + + _ -> + :miss + end + + case sync_indices do + :miss -> + :skip + + indices when is_list(indices) -> + # Withdrawal validator indices + withdrawal_indices = + Enum.map(block.body.execution_payload.withdrawals, & &1.validator_index) + + # Proposer gets rewards from sync aggregate + attestations + {:ok, Enum.concat([indices, withdrawal_indices, [block.proposer_index]])} + end + end + end + + # Try to compute the participation field hashes incrementally (fields 15, 16). + # Collects attesting validator indices from the block's attestations, reads + # their new participation values, and passes to the NIF for incremental update. + defp maybe_incremental_participation_hash( + cached_field_hashes, + timings, + block, + state, + prev_field_hashes + ) do + epoch_processed? = Map.has_key?(timings, :"epoch.rewards_and_penalties") + + if epoch_processed? or cached_field_hashes == %{} do + cached_field_hashes + else + epoch = Accessors.get_current_epoch(state) + + # Collect attesting validator indices, split by target epoch + {prev_indices, curr_indices} = + collect_attesting_indices(block.body.attestations, state, epoch) + + cached_field_hashes = + try_incremental_participation( + cached_field_hashes, + 15, + prev_indices, + state.previous_epoch_participation, + prev_field_hashes + ) + + try_incremental_participation( + cached_field_hashes, + 16, + curr_indices, + state.current_epoch_participation, + prev_field_hashes + ) + end + end + + defp try_incremental_participation( + cached_field_hashes, + field_num, + indices, + participation, + prev_field_hashes + ) do + prev_hash = Map.get(prev_field_hashes, field_num) + + if is_nil(prev_hash) do + cached_field_hashes + else + if indices == [] do + # No changes to this participation field — pass empty updates to get current hash. + case Ssz.update_participation_cache( + field_num, + [], + Aja.Vector.size(participation), + prev_hash + ) do + {:ok, hash} -> Map.put(cached_field_hashes, field_num, hash) + {:error, :cache_miss} -> cached_field_hashes + end + else + updates = + indices + |> Enum.uniq() + |> Enum.map(fn idx -> {idx, Aja.Vector.at!(participation, idx)} end) + + case Ssz.update_participation_cache( + field_num, + updates, + Aja.Vector.size(participation), + prev_hash + ) do + {:ok, hash} -> Map.put(cached_field_hashes, field_num, hash) + {:error, :cache_miss} -> cached_field_hashes + end + end + end + end + + # Try to compute the randao_mixes field hash incrementally (field 13). + # Only 1 entry changes per block (current epoch's randao mix). Pass the index + # and new value to the NIF to update just 16 nodes instead of hashing 65536 entries. + defp maybe_incremental_randao_hash( + cached_field_hashes, + timings, + _block, + state, + prev_field_hashes + ) do + epoch_processed? = Map.has_key?(timings, :"epoch.rewards_and_penalties") + prev_hash = Map.get(prev_field_hashes, 13) + + if epoch_processed? or cached_field_hashes == %{} or is_nil(prev_hash) do + cached_field_hashes + else + epoch = Accessors.get_current_epoch(state) + epochs_per_historical_vector = ChainSpec.get("EPOCHS_PER_HISTORICAL_VECTOR") + index = rem(epoch, epochs_per_historical_vector) + new_value = Aja.Vector.at!(state.randao_mixes, index) + + case Ssz.update_randao_cache( + index, + new_value, + Aja.Vector.size(state.randao_mixes), + prev_hash + ) do + {:ok, hash} -> Map.put(cached_field_hashes, 13, hash) + {:error, :cache_miss} -> cached_field_hashes + end + end + end + + # Collect attesting validator indices from block attestations, split by target epoch. + # Returns {previous_epoch_indices, current_epoch_indices}. + # Uses cached beacon committees from ETS for efficient lookup. + defp collect_attesting_indices(attestations, state, current_epoch) do + Enum.reduce(attestations, {[], []}, fn att, {prev_acc, curr_acc} -> + is_current = att.data.target.epoch == current_epoch + + case Accessors.get_attesting_indices(state, att) do + {:ok, indices} -> + idx_list = MapSet.to_list(indices) + + if is_current, + do: {prev_acc, idx_list ++ curr_acc}, + else: {idx_list ++ prev_acc, curr_acc} + + _ -> + {prev_acc, curr_acc} + end + end) + end + @spec transition(BeaconState.t(), SignedBeaconBlock.t()) :: {:ok, BeaconState.t(), timings()} def transition(beacon_state, signed_block, previous_roots \\ %{}) do @@ -216,6 +702,11 @@ defmodule LambdaEthereumConsensus.StateTransition do end defp process_epoch(%BeaconState{} = state) do + # Force GC before epoch processing to start with a clean heap. + # Epoch processing allocates many large temporaries (Aja.Vectors, lists). + # Without this, deferred GC can cause 1-2s pauses mid-processing. + :erlang.garbage_collect() + {:ok, state, %{}} |> epoch_op( :justification_and_finalization, @@ -267,6 +758,7 @@ defmodule LambdaEthereumConsensus.StateTransition do |> block_op(:execution_payload, &Operations.process_execution_payload(&1, block.body)) |> block_op(:randao, &Operations.process_randao(&1, block.body)) |> block_op(:eth1_data, &Operations.process_eth1_data(&1, block.body)) + |> prefetch_committees_for_block() |> block_op(:operations, &Operations.process_operations(&1, block.body)) |> block_op( :sync_aggregate, @@ -274,6 +766,23 @@ defmodule LambdaEthereumConsensus.StateTransition do ) end + # Ensure beacon committees for the current epoch are cached before processing + # attestations. Without this, each attestation triggers an expensive on-demand + # committee computation (~650ms × 8 committees = ~5.2s per block). The full + # epoch prefetch (~10s) amortizes to ~312ms per block across 32 blocks. + defp prefetch_committees_for_block({:ok, state, timings}) do + epoch = Misc.compute_epoch_at_slot(state.slot) + + {_, timings} = + timed(:prefetch_committees, timings, fn -> + Accessors.maybe_prefetch_committees(state, epoch) + end) + + {:ok, state, timings} + end + + defp prefetch_committees_for_block(err), do: err + def epoch_op({:ok, state, timings}, operation, f) do key = :"epoch.#{operation}" diff --git a/lib/lambda_ethereum_consensus/store/block_states.ex b/lib/lambda_ethereum_consensus/store/block_states.ex index 3a0b1f79c..f4af14d93 100644 --- a/lib/lambda_ethereum_consensus/store/block_states.ex +++ b/lib/lambda_ethereum_consensus/store/block_states.ex @@ -7,8 +7,14 @@ defmodule LambdaEthereumConsensus.Store.BlockStates do alias Types.StateInfo @table :states_by_block_hash - @max_entries 128 - @batch_prune_size 16 + # Each BeaconState is ~460MB on Hoodi (~200K validators) and ~775MB on mainnet + # (~1.2M validators). With 10 entries on mainnet, the cache uses ~7.7GB. + # 6 entries caused frequent cache misses triggering 30s+ LevelDB reads that + # blocked the Libp2pPort GenServer. 10 entries balances memory (7.7 GB) with + # cache hit rate. Previously 16 (12.4 GB, OOM during epoch processing) and + # before that 128 (55+ GB, swap thrashing). + @max_entries 10 + @batch_prune_size 2 ########################## ### Public API @@ -20,7 +26,11 @@ defmodule LambdaEthereumConsensus.Store.BlockStates do table: @table, max_entries: @max_entries, batch_prune_size: @batch_prune_size, - store_func: fn _k, v -> StateDb.store_state_info(v) end + # NOTE: LevelDB persistence is handled by the caller (handlers.ex uses + # Task.Supervisor for async writes). The LRU cache only manages ETS caching. + # Previously this was synchronous and blocked the Libp2pPort GenServer for + # 30-60s during state serialization+write. + store_func: fn _k, _v -> :ok end ) end @@ -32,11 +42,20 @@ defmodule LambdaEthereumConsensus.Store.BlockStates do end @spec store_state_info(StateInfo.t()) :: :ok - def store_state_info(state_info), do: LRUCache.put(@table, state_info.root, state_info) + def store_state_info(state_info), do: LRUCache.put_cache(@table, state_info.root, state_info) @spec get_state_info(Types.root()) :: StateInfo.t() | nil def get_state_info(block_root), do: LRUCache.get(@table, block_root, &fetch_state/1) + @doc """ + Get state info from the ETS LRU cache only, without falling through to + LevelDB. Returns nil on cache miss. Used by prefetch_states to avoid + blocking the ForkChoice GenServer with 28-85s LevelDB deserialization + of 775MB mainnet BeaconStates. + """ + @spec get_state_info_cached(Types.root()) :: StateInfo.t() | nil + def get_state_info_cached(block_root), do: LRUCache.get_cached(@table, block_root) + @spec get_state_info!(Types.root()) :: StateInfo.t() def get_state_info!(block_root) do case get_state_info(block_root) do @@ -45,6 +64,13 @@ defmodule LambdaEthereumConsensus.Store.BlockStates do end end + @doc """ + Touch a cache entry to refresh its TTL without fetching or inserting. + Used to prevent parent state eviction during long prefetch operations. + """ + @spec touch(Types.root()) :: :ok + def touch(block_root), do: LRUCache.touch(@table, block_root) + ########################## ### Private Functions ########################## diff --git a/lib/lambda_ethereum_consensus/store/blocks.ex b/lib/lambda_ethereum_consensus/store/blocks.ex index 4d1af4847..0767cabf8 100644 --- a/lib/lambda_ethereum_consensus/store/blocks.ex +++ b/lib/lambda_ethereum_consensus/store/blocks.ex @@ -48,6 +48,14 @@ defmodule LambdaEthereumConsensus.Store.Blocks do @spec get_block_info(Types.root()) :: BlockInfo.t() | nil def get_block_info(block_root), do: LRUCache.get(@table, block_root, &fetch_block_info/1) + @doc """ + Like get_block_info/1 but only checks the ETS LRU cache. + Returns nil on cache miss. Used by hot paths to avoid blocking + Libp2pPort on eleveldb.get/3 NIF reads. + """ + @spec get_block_info_cached(Types.root()) :: BlockInfo.t() | nil + def get_block_info_cached(block_root), do: LRUCache.get_cached(@table, block_root) + @spec get_block_info!(Types.root()) :: BlockInfo.t() def get_block_info!(block_root) do case LRUCache.get(@table, block_root, &fetch_block_info/1) do @@ -64,6 +72,15 @@ defmodule LambdaEthereumConsensus.Store.Blocks do end end + @doc "Cache-only block lookup; returns nil on miss." + @spec get_block_cached(Types.root()) :: BeaconBlock.t() | nil + def get_block_cached(block_root) do + case get_block_info_cached(block_root) do + nil -> nil + %{signed_block: %{message: block}} -> block + end + end + @spec has_block?(Types.root()) :: boolean() def has_block?(block_root), do: not (get_block_info(block_root) |> is_nil()) diff --git a/lib/lambda_ethereum_consensus/store/data_column_db.ex b/lib/lambda_ethereum_consensus/store/data_column_db.ex index 0e6d201b1..db5a1a143 100644 --- a/lib/lambda_ethereum_consensus/store/data_column_db.ex +++ b/lib/lambda_ethereum_consensus/store/data_column_db.ex @@ -53,6 +53,17 @@ defmodule LambdaEthereumConsensus.Store.DataColumnDb do match?({:ok, _}, Db.get(key)) end + @doc """ + Deletes stored data column sidecars for a block root at the given column indices. + Used to purge potentially corrupted columns so they can be re-downloaded. + """ + @spec delete_columns_for_block(Types.root(), [Types.column_index()]) :: :ok + def delete_columns_for_block(block_root, column_indices) do + Enum.each(column_indices, fn ci -> + Db.delete(sidecar_key(block_root, ci)) + end) + end + @spec prune_old_data_columns(non_neg_integer()) :: :ok | {:error, String.t()} | :not_found def prune_old_data_columns(current_finalized_slot) do slot = diff --git a/lib/lambda_ethereum_consensus/store/db.ex b/lib/lambda_ethereum_consensus/store/db.ex index fda44fea9..bd0893362 100644 --- a/lib/lambda_ethereum_consensus/store/db.ex +++ b/lib/lambda_ethereum_consensus/store/db.ex @@ -67,7 +67,7 @@ defmodule LambdaEthereumConsensus.Store.Db do @impl true def init(opts) do - db_dir = Keyword.get(opts, :dir, get_dir()) + db_dir = Keyword.get_lazy(opts, :dir, &get_dir/0) db_full_path = Path.expand(db_dir) File.mkdir_p!(db_full_path) {:ok, ref} = Exleveldb.open(db_full_path, create_if_missing: true) diff --git a/lib/lambda_ethereum_consensus/store/kv_schema.ex b/lib/lambda_ethereum_consensus/store/kv_schema.ex index e9a62bab9..0af7e70b0 100644 --- a/lib/lambda_ethereum_consensus/store/kv_schema.ex +++ b/lib/lambda_ethereum_consensus/store/kv_schema.ex @@ -65,19 +65,26 @@ defmodule LambdaEthereumConsensus.Store.KvSchema do direction = Keyword.get(opts, :direction, :prev) with {:ok, it} <- Db.iterate_keys(), - {:ok, encoded_start} <- do_encode_key(start_key), - {:ok, ^encoded_start} <- Db.iterator_move(it, encoded_start) do - res = iterate(it, starting_value, f, direction, encoded_start, include_first?) + {:ok, encoded_start} <- do_encode_key(start_key) do + result = + case Db.iterator_move(it, encoded_start) do + {:ok, ^encoded_start} -> + iterate(it, starting_value, f, direction, encoded_start, include_first?) + + {:ok, _other_key} -> + # The exact start_key doesn't exist in the DB. The iterator is + # positioned at the next lexicographically higher key. We can + # still iterate from here — accumulate/4 validates the prefix + # for each key, so we won't leave our table's key space. + iterate(it, starting_value, f, direction) + + {:error, :invalid_iterator} -> + # No key at or after start_key exists in the DB. + starting_value + end + Db.iterator_close(it) - {:ok, res} - else - # The iterator moved for the first time to a place where it wasn't expected. - {:ok, some_key} -> - {:error, - "Failed to start iterator for table #{@prefix}. The obtained key is: #{some_key}"} - - other -> - other + {:ok, result} end end) end diff --git a/lib/lambda_ethereum_consensus/store/lru_cache.ex b/lib/lambda_ethereum_consensus/store/lru_cache.ex index 1288f4c31..38256fcf8 100644 --- a/lib/lambda_ethereum_consensus/store/lru_cache.ex +++ b/lib/lambda_ethereum_consensus/store/lru_cache.ex @@ -38,6 +38,34 @@ defmodule LambdaEthereumConsensus.Store.LRUCache do :ok end + @doc """ + Insert a value into the ETS cache without calling the store_func. + The ETS insert is immediate (public table), and TTL management is + deferred via GenServer.cast (non-blocking). Use this when LevelDB + persistence is handled separately by the caller. + """ + @spec put_cache(atom(), key(), value()) :: :ok + def put_cache(table, key, value) do + :ets.insert(table, {key, value, nil}) + GenServer.cast(table, {:touch_entry, key}) + :ok + end + + @doc """ + Touch a cache entry to refresh its TTL without fetching or returning it. + No-op if the key is not in the cache. Used to prevent eviction of + critical entries (e.g., parent state) during long operations. + """ + @spec touch(atom(), key()) :: :ok + def touch(table, key) do + case :ets.lookup_element(table, key, 2, nil) do + nil -> :ok + _v -> GenServer.cast(table, {:touch_entry, key}) + end + + :ok + end + @spec get(atom(), key(), (key() -> value() | nil)) :: value() | nil def get(table, key, fetch_func) do case :ets.lookup_element(table, key, 2, nil) do @@ -58,6 +86,23 @@ defmodule LambdaEthereumConsensus.Store.LRUCache do end end + @doc """ + Get a value from the ETS cache only, without falling through to the + persistence layer. Returns nil on cache miss. Used by prefetch_states + to avoid blocking the ForkChoice GenServer with 28-85s LevelDB reads. + """ + @spec get_cached(atom(), key()) :: value() | nil + def get_cached(table, key) do + case :ets.lookup_element(table, key, 2, nil) do + nil -> + nil + + v -> + :ok = GenServer.cast(table, {:touch_entry, key}) + v + end + end + ########################## ### GenServer Callbacks ########################## diff --git a/lib/lambda_ethereum_consensus/store/state_db.ex b/lib/lambda_ethereum_consensus/store/state_db.ex index e13cab075..742a5075a 100644 --- a/lib/lambda_ethereum_consensus/store/state_db.ex +++ b/lib/lambda_ethereum_consensus/store/state_db.ex @@ -71,6 +71,8 @@ defmodule LambdaEthereumConsensus.Store.StateDb do Logger.error( "[Block pruning] Failed to remove block from slot #{inspect(slot)}. Reason: #{inspect(other)}" ) + + acc end end) diff --git a/lib/lambda_ethereum_consensus/store/store_db.ex b/lib/lambda_ethereum_consensus/store/store_db.ex index ca738924f..338014a73 100644 --- a/lib/lambda_ethereum_consensus/store/store_db.ex +++ b/lib/lambda_ethereum_consensus/store/store_db.ex @@ -6,6 +6,7 @@ defmodule LambdaEthereumConsensus.Store.StoreDb do alias Types.Store @store_prefix "store" + @genesis_time_key {__MODULE__, :genesis_time} @spec fetch_store() :: {:ok, Types.Store.t()} | :not_found def fetch_store() do @@ -16,22 +17,63 @@ defmodule LambdaEthereumConsensus.Store.StoreDb do @spec persist_store(Types.Store.t()) :: :ok def persist_store(%Types.Store{} = store) do + # Cache genesis_time in persistent_term for fast access. + # This avoids deserializing the entire store just to read genesis_time. + cache_genesis_time(store.genesis_time) + :telemetry.span([:db, :latency], %{}, fn -> {put(@store_prefix, Store.remove_cache(store)), %{module: "fork_choice", action: "persist"}} end) end + @doc """ + Serialize the store in the calling process, then spawn a process to write it + to LevelDB. This avoids the deep-copy overhead of spawning with the full Store + struct (~1.2M latest_messages on mainnet = 15s copy + 3-5 GB extra memory). + The serialized binary is a refc binary shared between processes without copying. + """ + @spec persist_store_async(Types.Store.t()) :: pid() + def persist_store_async(%Types.Store{} = store) do + cache_genesis_time(store.genesis_time) + # Serialize in-process (no deep copy needed, ~7-9s on mainnet with compression) + binary = :erlang.term_to_binary(Store.remove_cache(store), [{:compressed, 1}]) + # Spawn only the LevelDB write — binary is shared via refc, no copy + spawn(fn -> Db.put(@store_prefix, binary) end) + end + @spec fetch_genesis_time() :: {:ok, Types.uint64()} | :not_found def fetch_genesis_time() do - with {:ok, store} <- fetch_store() do - store.genesis_time + case cached_genesis_time() do + nil -> + with {:ok, store} <- fetch_store() do + cache_genesis_time(store.genesis_time) + store.genesis_time + end + + time -> + {:ok, time} end end @spec fetch_genesis_time!() :: Types.uint64() def fetch_genesis_time!() do - {:ok, %{genesis_time: genesis_time}} = fetch_store() - genesis_time + case cached_genesis_time() do + nil -> + {:ok, %{genesis_time: genesis_time}} = fetch_store() + cache_genesis_time(genesis_time) + genesis_time + + time -> + time + end + end + + defp cached_genesis_time() do + :persistent_term.get(@genesis_time_key, nil) + end + + defp cache_genesis_time(genesis_time) do + :persistent_term.put(@genesis_time_key, genesis_time) end defp get(key) do diff --git a/lib/libp2p_port.ex b/lib/libp2p_port.ex index 0c77e3cc3..33ee3a838 100644 --- a/lib/libp2p_port.ex +++ b/lib/libp2p_port.ex @@ -466,10 +466,15 @@ defmodule LambdaEthereumConsensus.Libp2pPort do end # There may be pending blocks from a prior execution, regardless of the optimistic sync - # state. We should run a process_blocks round. If no pending blocks are available, this - # call is a noop. + # state. First recover any blocks that were wrongly marked :invalid due to transient + # failures, then run a process_blocks round. Schedule a column download retry so + # recovered blocks in :download_columns get their columns checked. @impl GenServer def handle_continue(:check_pending_blocks, state) do + if PendingBlocks.recover_invalid_blocks() == :recovered do + Process.send_after(self(), :retry_download_columns, 5_000) + end + {:noreply, update_in(state.store, &PendingBlocks.process_blocks/1)} end @@ -531,19 +536,29 @@ defmodule LambdaEthereumConsensus.Libp2pPort do @impl GenServer def handle_cast({:error_downloading_chunk, range, reason}, state) do + {first_slot, last_slot} = range + count = last_slot - first_slot + 1 + Logger.error( "[Optimistic Sync] Failed to download the block range #{inspect(range)}, no retries left. Reason: #{inspect(reason)}" ) - # TODO: kill the genserver or retry sync all together. - {:noreply, state} + # Decrement blocks_remaining so the node doesn't get stuck thinking it's + # still syncing. Without this, a failed range request leaves blocks_remaining + # positive forever, syncing stays true, and gossip subscription recovery + # never triggers. + new_state = + state + |> Map.update(:blocks_remaining, 0, fn n -> max(n - count, 0) end) + |> subscribe_if_no_blocks() + + {:noreply, new_state} end @impl GenServer def handle_info(:on_tick, state) do schedule_next_tick() time = :os.system_time(:second) - {:noreply, on_tick(time, state)} end @@ -591,7 +606,7 @@ defmodule LambdaEthereumConsensus.Libp2pPort do @impl GenServer def handle_info({_port, {:data, data}}, state) do - %Notification{n: {_, payload}} = Notification.decode(data) + %Notification{n: {_type, payload}} = Notification.decode(data) {:noreply, handle_notification(payload, state)} end @@ -612,7 +627,7 @@ defmodule LambdaEthereumConsensus.Libp2pPort do # Self-sustaining heartbeat: always reschedule so stuck :download_columns # blocks are retried regardless of failure mode (no_peers, partial/empty response, error). - Process.send_after(self(), :retry_download_columns, 60_000) + Process.send_after(self(), :retry_download_columns, 12_000) {:noreply, update_in(state.store, &PendingBlocks.retry_download_columns/1)} end @@ -713,7 +728,7 @@ defmodule LambdaEthereumConsensus.Libp2pPort do direction: "->elixir" }) - case IncomingRequestsHandler.handle(protocol_id, request_id, message) do + case IncomingRequestsHandler.handle(protocol_id, request_id, message, state.store) do {:ok, response} -> send_response(response, port) @@ -943,6 +958,28 @@ defmodule LambdaEthereumConsensus.Libp2pPort do when slot - head_slot == 0, do: %{state | syncing: false} + defp update_syncing_status( + %{syncing: true, blocks_remaining: 0} = state, + {slot, _third}, + %Types.Store{head_slot: head_slot} + ) + when slot - head_slot > 2 do + last_resync_head = Map.get(state, :last_resync_head) + + if last_resync_head == head_slot do + # Already triggered a resync and head hasn't moved yet (blocks still processing). + # Wait for the processing pipeline to make progress before re-syncing. + state + else + Logger.info( + "[Libp2p] Sync batch complete but still #{slot - head_slot} slots behind, re-syncing" + ) + + Process.send_after(self(), :sync_blocks, 500) + state |> Map.put(:blocks_remaining, -1) |> Map.put(:last_resync_head, head_slot) + end + end + defp update_syncing_status(state, _slot_data, _), do: state defp schedule_next_tick() do diff --git a/lib/mix/tasks/bench/blocks.ex b/lib/mix/tasks/bench/blocks.ex new file mode 100644 index 000000000..c756c68eb --- /dev/null +++ b/lib/mix/tasks/bench/blocks.ex @@ -0,0 +1,264 @@ +defmodule Mix.Tasks.Bench.Blocks do + @moduledoc """ + Process downloaded blocks through the fork choice pipeline. + + Loads cached benchmark data from disk (produced by `mix bench.download`) + and processes blocks sequentially through `ForkChoice.process_block/2`. + + ## Usage + + mix bench.blocks --data-dir bench/data/slot_9649056_200 + mix bench.blocks --data-dir bench/data/slot_9649056_200 --log-level warning + """ + use Mix.Task + + require Logger + + alias LambdaEthereumConsensus.ForkChoice + alias LambdaEthereumConsensus.ForkChoice.Handlers + alias LambdaEthereumConsensus.StateTransition.Cache + alias LambdaEthereumConsensus.Store + alias LambdaEthereumConsensus.Store.CheckpointStates + alias LambdaEthereumConsensus.Store.DataColumnDb + alias Types.BlockInfo + alias Types.DataColumnSidecar + alias Types.SignedBeaconBlock + + @shortdoc "Run block processing benchmark" + + @switches [data_dir: :string, log_level: :string] + + @impl Mix.Task + def run(args) do + {opts, _rest} = OptionParser.parse!(args, strict: @switches) + + data_dir = + opts[:data_dir] || Mix.raise("--data-dir is required") + + log_level = + opts + |> Keyword.get(:log_level, "info") + |> String.to_existing_atom() + + Logger.configure(level: log_level) + + data_dir = Path.expand(data_dir) + + unless File.dir?(data_dir) do + Mix.raise("Data directory does not exist: #{data_dir}") + end + + metadata = read_metadata(data_dir) + start_slot = metadata["start_slot"] + count = metadata["count"] + + # We skip app.start because runtime.exs parses System.argv() + # with strict validation, rejecting our custom flags. + # boot_infrastructure starts everything we need directly. + + network = metadata["network"] || "mainnet" + boot_infrastructure(network) + + anchor_state = load_state(data_dir) + anchor_block = load_anchor_block(data_dir, start_slot) + blocks = load_blocks(data_dir, start_slot) + column_count = load_all_columns(data_dir) + + Logger.info("Loaded anchor state at slot #{anchor_state.slot}") + Logger.info("Loaded #{length(blocks)} blocks, #{column_count} data columns") + + # Skip data availability check when no columns were downloaded + # (e.g. blobs pruned on source node, or pre-Fulu data) + if column_count == 0 do + Application.put_env(:lambda_ethereum_consensus, :skip_data_availability, true) + Logger.info("No columns found, skipping data availability checks") + end + + {:ok, store} = Types.Store.get_forkchoice_store(anchor_state, anchor_block) + store = Handlers.on_tick(store, :os.system_time(:second)) + + # Pre-warm committee cache to simulate steady-state conditions. + # In production, committees are cached from the prior epoch's blocks. + alias LambdaEthereumConsensus.StateTransition.Accessors + epoch = Accessors.get_current_epoch(anchor_state) + Accessors.maybe_prefetch_committees(anchor_state, epoch) + Logger.info("Pre-warmed committee cache for epoch #{epoch}") + + {_store, results} = process_blocks(blocks, store) + + print_summary(results, start_slot, count) + end + + defp read_metadata(data_dir) do + path = Path.join(data_dir, "metadata.json") + + case File.read(path) do + {:ok, contents} -> Jason.decode!(contents) + {:error, reason} -> Mix.raise("Failed to read metadata.json: #{reason}") + end + end + + defp boot_infrastructure(network) do + Application.ensure_all_started(:snappyer) + Application.ensure_all_started(:jason) + Application.ensure_all_started(:telemetry) + + # Configure ChainSpec + config = ConfigUtils.parse_config!(network) + Application.put_env(:lambda_ethereum_consensus, ChainSpec, config: config) + + # Mock the engine API + Application.put_env( + :lambda_ethereum_consensus, + LambdaEthereumConsensus.Execution.EngineApi, + implementation: LambdaEthereumConsensus.Execution.EngineApi.Mocked + ) + + CheckpointStates.new() + + # Use a temporary directory for LevelDB + tmp_db_dir = + Path.join(System.tmp_dir!(), "bench_blocks_#{System.unique_integer([:positive])}") + + {:ok, _} = Store.Db.start_link(dir: tmp_db_dir) + {:ok, _} = Store.Blocks.start_link([]) + {:ok, _} = Store.BlockStates.start_link([]) + Cache.initialize_cache() + + {:ok, _} = Task.Supervisor.start_link(name: StoreStatesSupervisor) + {:ok, _} = Task.Supervisor.start_link(name: PruneStatesSupervisor) + {:ok, _} = Task.Supervisor.start_link(name: PruneBlocksSupervisor) + {:ok, _} = Task.Supervisor.start_link(name: PruneBlobsSupervisor) + end + + defp load_state(data_dir) do + decompress_and_decode(Path.join(data_dir, "state.ssz_snappy"), Types.BeaconState) + end + + defp load_anchor_block(data_dir, start_slot) do + decompress_and_decode( + Path.join(data_dir, "block_#{start_slot}.ssz_snappy"), + SignedBeaconBlock + ) + end + + # Load all block files except the anchor block (which is at start_slot) + defp load_blocks(data_dir, start_slot) do + Path.wildcard(Path.join(data_dir, "block_*.ssz_snappy")) + |> Enum.map(fn path -> + slot = extract_slot_from_filename(path) + {slot, path} + end) + |> Enum.reject(fn {slot, _} -> slot == start_slot end) + |> Enum.sort_by(fn {slot, _} -> slot end) + |> Enum.map(fn {slot, path} -> + block = decompress_and_decode(path, SignedBeaconBlock) + {slot, block} + end) + end + + defp load_all_columns(data_dir) do + Path.wildcard(Path.join(data_dir, "columns_*")) + |> Enum.filter(&File.dir?/1) + |> Enum.flat_map(fn col_dir -> + Path.wildcard(Path.join(col_dir, "column_*.ssz_snappy")) + |> Enum.map(fn path -> + column = decompress_and_decode(path, DataColumnSidecar) + DataColumnDb.store_data_column(column) + column + end) + end) + |> length() + end + + defp decompress_and_decode(path, type) do + {:ok, compressed} = File.read(path) + {:ok, ssz_data} = :snappyer.decompress(compressed) + {:ok, object} = Ssz.from_ssz(ssz_data, type) + object + end + + defp extract_slot_from_filename(path) do + path + |> Path.basename() + |> String.replace_prefix("block_", "") + |> String.replace_suffix(".ssz_snappy", "") + |> String.to_integer() + end + + defp process_blocks(blocks, store) do + slots_per_epoch = ChainSpec.get("SLOTS_PER_EPOCH") + + Enum.reduce(blocks, {store, []}, fn {slot, signed_block}, {store, results} -> + process_single_block(slot, signed_block, store, results, slots_per_epoch) + end) + |> then(fn {store, results} -> {store, Enum.reverse(results)} end) + end + + defp process_single_block(slot, signed_block, store, results, slots_per_epoch) do + block_info = BlockInfo.from_block(signed_block, :pending) + start_time = System.monotonic_time(:millisecond) + + case ForkChoice.process_block(block_info, store) do + {:ok, new_store, timings} -> + elapsed = System.monotonic_time(:millisecond) - start_time + epoch_boundary? = rem(slot, slots_per_epoch) == 0 + + pairs = + timings + |> Enum.sort_by(fn {_k, v} -> v end, :desc) + |> Enum.map_join(" ", fn {k, v} -> "#{k}=#{v}ms" end) + + Logger.info( + "Slot #{slot}: #{elapsed}ms#{if epoch_boundary?, do: " [epoch boundary]", else: ""} #{pairs}" + ) + + {new_store, [{slot, elapsed, epoch_boundary?} | results]} + + {:error, reason} -> + elapsed = System.monotonic_time(:millisecond) - start_time + Logger.error("Slot #{slot}: failed after #{elapsed}ms: #{inspect(reason)}") + {store, results} + end + end + + defp print_summary(results, start_slot, count) do + total_blocks = length(results) + empty_slots = count - total_blocks + + {epoch_results, non_epoch_results} = + Enum.split_with(results, fn {_slot, _ms, epoch?} -> epoch? end) + + total_ms = results |> Enum.map(fn {_, ms, _} -> ms end) |> Enum.sum() + + avg_ms = + if total_blocks > 0, do: Float.round(total_ms / total_blocks, 1), else: 0 + + non_epoch_avg = + case non_epoch_results do + [] -> 0 + list -> Float.round(Enum.sum(Enum.map(list, fn {_, ms, _} -> ms end)) / length(list), 1) + end + + IO.puts("\n=== Block Processing Benchmark ===") + IO.puts("Slots: #{start_slot} -> #{start_slot + count}") + IO.puts("Blocks: #{total_blocks} / #{count} (#{empty_slots} empty slots)") + IO.puts("Epochs: #{length(epoch_results)} boundaries crossed") + IO.puts("") + IO.puts("Total time: #{format_time(total_ms)}") + IO.puts("Avg per block: #{round(avg_ms)}ms") + + if epoch_results != [] do + epoch_details = + epoch_results + |> Enum.map_join(", ", fn {slot, ms, _} -> "slot #{slot}: #{format_time(ms)}" end) + + IO.puts("Epoch blocks: [#{epoch_details}]") + end + + IO.puts("Non-epoch avg: #{round(non_epoch_avg)}ms") + end + + defp format_time(ms) when ms >= 1000, do: "#{Float.round(ms / 1000, 1)}s" + defp format_time(ms), do: "#{round(ms)}ms" +end diff --git a/lib/mix/tasks/bench/download.ex b/lib/mix/tasks/bench/download.ex new file mode 100644 index 000000000..4b6cbe751 --- /dev/null +++ b/lib/mix/tasks/bench/download.ex @@ -0,0 +1,252 @@ +defmodule Mix.Tasks.Bench.Download do + @moduledoc """ + Download blocks and state from a Beacon API node for benchmarking. + + Downloads a BeaconState at the start slot, then fetches blocks and blob sidecars + for `count` consecutive slots. Blobs are converted to data column sidecars. + Everything is saved to disk as SSZ snappy-compressed files. + + ## Usage + + mix bench.download --url http://localhost:5052 --start-slot 1000 --count 32 + + ## Options + + * `--url` (required) - Beacon API base URL + * `--start-slot` (required) - Slot to anchor from + * `--count` (required) - Number of slots after start to fetch + * `--data-dir` (optional, default `bench/data`) - Base directory for output + * `--network` (optional, default `mainnet`) - Network config (mainnet, sepolia, etc.) + + ## Output Structure + + bench/data/slot__/ + metadata.json + state.ssz_snappy + block_.ssz_snappy + columns_/ + column_.ssz_snappy + """ + + use Mix.Task + + @shortdoc "Download blocks from Beacon API for benchmarking" + + alias LambdaEthereumConsensus.StateTransition.DasCore + alias Types.BeaconState + alias Types.SignedBeaconBlock + + @impl Mix.Task + def run(args) do + {url, start_slot, count, out_dir} = parse_and_setup(args) + + fetch_anchor_data!(url, start_slot, out_dir) + + results = fetch_block_range(url, start_slot, count, out_dir) + + Mix.shell().info(""" + + Download complete! + Directory: #{out_dir} + Blocks found: #{results.blocks} + Empty slots: #{results.empty} + Total blobs: #{results.blobs} + Total columns generated: #{results.columns} + """) + end + + defp parse_and_setup(args) do + {opts, _, _} = + OptionParser.parse(args, + strict: [ + url: :string, + start_slot: :integer, + count: :integer, + data_dir: :string, + network: :string + ] + ) + + url = opts[:url] || Mix.raise("--url is required") + start_slot = opts[:start_slot] || Mix.raise("--start-slot is required") + count = opts[:count] || Mix.raise("--count is required") + data_dir = opts[:data_dir] || "bench/data" + network = opts[:network] || "mainnet" + + for app <- [:jason, :hackney, :tesla, :snappyer], do: Application.ensure_all_started(app) + + config = ConfigUtils.parse_config!(network) + Application.put_env(:lambda_ethereum_consensus, ChainSpec, config: config) + Code.ensure_loaded!(Ssz) + Code.ensure_loaded!(Kzg) + + slots_per_epoch = ChainSpec.get("SLOTS_PER_EPOCH") + + if rem(start_slot, slots_per_epoch) != 0 do + Mix.shell().info( + "WARNING: start-slot #{start_slot} is not an epoch boundary (SLOTS_PER_EPOCH=#{slots_per_epoch})" + ) + end + + out_dir = Path.join(data_dir, "slot_#{start_slot}_#{count}") + File.mkdir_p!(out_dir) + + metadata = %{ + url: url, + start_slot: start_slot, + count: count, + network: network, + timestamp: DateTime.utc_now() |> DateTime.to_iso8601() + } + + File.write!(Path.join(out_dir, "metadata.json"), Jason.encode!(metadata, pretty: true)) + + {url, start_slot, count, out_dir} + end + + defp fetch_anchor_data!(url, start_slot, out_dir) do + Mix.shell().info("Fetching state at slot #{start_slot}...") + + case get_ssz_from_url(url, "/eth/v2/debug/beacon/states/#{start_slot}", BeaconState) do + {:ok, state} -> + write_ssz_snappy!(Path.join(out_dir, "state.ssz_snappy"), state) + Mix.shell().info("State saved.") + + {:error, reason} -> + Mix.raise("Failed to fetch state: #{inspect(reason)}") + end + + Mix.shell().info("Fetching anchor block at slot #{start_slot}...") + + case get_ssz_from_url(url, "/eth/v2/beacon/blocks/#{start_slot}", SignedBeaconBlock) do + {:ok, anchor_block} -> + write_ssz_snappy!(Path.join(out_dir, "block_#{start_slot}.ssz_snappy"), anchor_block) + Mix.shell().info("Anchor block saved.") + + {:error, reason} -> + Mix.raise("Failed to fetch anchor block: #{inspect(reason)}") + end + end + + defp fetch_block_range(url, start_slot, count, out_dir) do + slots = (start_slot + 1)..(start_slot + count) + + Enum.reduce(slots, %{blocks: 0, empty: 0, blobs: 0, columns: 0}, fn slot, acc -> + Mix.shell().info("Fetching slot #{slot}...") + + case get_ssz_from_url(url, "/eth/v2/beacon/blocks/#{slot}", SignedBeaconBlock) do + {:ok, signed_block} -> + write_ssz_snappy!(Path.join(out_dir, "block_#{slot}.ssz_snappy"), signed_block) + acc = %{acc | blocks: acc.blocks + 1} + fetch_and_convert_blobs(url, slot, signed_block, out_dir, acc) + + {:error, _} -> + Mix.shell().info(" Slot #{slot}: empty (no block)") + %{acc | empty: acc.empty + 1} + end + end) + end + + defp fetch_and_convert_blobs(url, slot, signed_block, out_dir, acc) do + case get_json(url, "/eth/v1/beacon/blob_sidecars/#{slot}") do + {:ok, %{"data" => blob_data}} when blob_data != [] -> + blobs = + Enum.map(blob_data, fn sidecar -> + sidecar["blob"] + |> String.trim_leading("0x") + |> Base.decode16!(case: :mixed) + end) + + blob_count = length(blobs) + Mix.shell().info(" Slot #{slot}: #{blob_count} blob(s), computing columns...") + + cells_and_proofs = + Enum.map(blobs, fn blob -> + {:ok, {cells, proofs}} = Kzg.compute_cells_and_kzg_proofs(blob) + {cells, proofs} + end) + + {:ok, columns} = DasCore.get_data_column_sidecars(signed_block, cells_and_proofs) + + # Write columns to disk + col_dir = Path.join(out_dir, "columns_#{slot}") + File.mkdir_p!(col_dir) + + Enum.each(columns, fn col -> + write_ssz_snappy!(Path.join(col_dir, "column_#{col.index}.ssz_snappy"), col) + end) + + column_count = length(columns) + Mix.shell().info(" Slot #{slot}: wrote #{column_count} columns") + %{acc | blobs: acc.blobs + blob_count, columns: acc.columns + column_count} + + {:ok, _} -> + Mix.shell().info(" Slot #{slot}: no blobs") + acc + + {:error, reason} -> + Mix.shell().info(" Slot #{slot}: failed to fetch blobs: #{inspect(reason)}") + acc + end + end + + defp get_ssz_from_url(base_url, path, result_type) do + client = + Tesla.client([ + {Tesla.Middleware.Headers, [{"Accept", "application/octet-stream"}]} + ]) + + full_url = concat_url(base_url, path) + + case Tesla.get(client, full_url) do + {:ok, %{status: 404}} -> + {:error, :not_found} + + {:ok, %{status: status}} when status >= 400 -> + {:error, {:http_error, status}} + + {:ok, response} -> + Ssz.from_ssz(response.body, result_type) + + {:error, _} = err -> + err + end + end + + defp get_json(base_url, path) do + client = + Tesla.client([ + {Tesla.Middleware.Headers, [{"Accept", "application/json"}]}, + Tesla.Middleware.JSON + ]) + + full_url = concat_url(base_url, path) + + case Tesla.get(client, full_url) do + {:ok, %{status: 404}} -> + {:error, :not_found} + + {:ok, %{status: status}} when status >= 400 -> + {:error, {:http_error, status}} + + {:ok, response} -> + {:ok, response.body} + + {:error, _} = err -> + err + end + end + + defp write_ssz_snappy!(path, object) do + {:ok, ssz_data} = Ssz.to_ssz(object) + {:ok, compressed} = :snappyer.compress(ssz_data) + File.write!(path, compressed) + end + + defp concat_url(base_url, path) do + base_url + |> URI.parse() + |> URI.append_path(path) + |> URI.to_string() + end +end diff --git a/lib/ssz.ex b/lib/ssz.ex index cbfec431c..1e9643c22 100644 --- a/lib/ssz.ex +++ b/lib/ssz.ex @@ -105,6 +105,59 @@ defmodule Ssz do |> hash_tree_root_vector_rs(max_size, schema) end + @doc """ + Hash a BeaconState with cached field hashes. + `cached_field_hashes` is a map of `%{field_index => 32-byte hash}` for fields + whose hash can be reused from a previous computation. + Returns `{:ok, root, field_hashes_binary}` where field_hashes_binary contains + all individual field hashes (num_fields * 32 bytes) for caching. + """ + @spec hash_beacon_state_cached(struct, map) :: + {:ok, Types.root(), binary()} | {:error, String.t()} + def hash_beacon_state_cached(%Types.BeaconState{} = state, cached_field_hashes \\ %{}) do + state + |> encode_beacon_state_selective(cached_field_hashes) + |> hash_beacon_state_cached_rs(cached_field_hashes) + end + + # Encode BeaconState, but skip expensive conversions for cached fields. + # Cached fields get placeholder values since the NIF won't read them. + # Field indices: 11=validators, 12=balances, 13=randao_mixes, + # 15=prev_participation, 16=curr_participation + defp encode_beacon_state_selective(%Types.BeaconState{} = state, cached) do + alias LambdaEthereumConsensus.Utils.BitVector + + state = + if Map.has_key?(cached, 11), + do: state, + else: Map.update!(state, :validators, &Aja.Vector.to_list/1) + + state = + if Map.has_key?(cached, 12), + do: state, + else: Map.update!(state, :balances, &Aja.Vector.to_list/1) + + state = + if Map.has_key?(cached, 13), + do: state, + else: Map.update!(state, :randao_mixes, &Aja.Vector.to_list/1) + + state = + if Map.has_key?(cached, 15), + do: state, + else: Map.update!(state, :previous_epoch_participation, &Aja.Vector.to_list/1) + + state = + if Map.has_key?(cached, 16), + do: state, + else: Map.update!(state, :current_epoch_participation, &Aja.Vector.to_list/1) + + # These conversions are always needed (small fields) + state + |> Map.update!(:latest_execution_payload_header, &Types.ExecutionPayloadHeader.encode/1) + |> Map.update!(:justification_bits, &BitVector.to_bytes/1) + end + ##### Rust-side function stubs @spec to_ssz_rs(map | list, module, module) :: {:ok, binary} | {:error, String.t()} def to_ssz_rs(_term, _schema, _config \\ ChainSpec.get_preset()), do: error() @@ -128,6 +181,110 @@ defmodule Ssz do def hash_tree_root_vector_rs(_vector, _max_size, _schema, _config \\ ChainSpec.get_preset()), do: error() + @spec hash_beacon_state_cached_rs(map, map, module) :: + {:ok, Types.root(), binary()} | {:error, String.t()} + def hash_beacon_state_cached_rs( + _state, + _cached_hashes, + _config \\ ChainSpec.get_preset() + ), + do: error() + + @doc """ + Apply targeted balance updates to the cached incremental balance merkle tree. + Returns `{:ok, hash}` or `{:error, :cache_miss}`. + `updates` is a list of `{index, new_value}` tuples. + """ + @spec update_balance_cache( + list({non_neg_integer(), non_neg_integer()}), + non_neg_integer(), + binary() + ) :: + {:ok, binary()} | {:error, :cache_miss} + def update_balance_cache(updates, balance_count, expected_prev_hash), + do: update_balance_cache_rs(updates, balance_count, expected_prev_hash) + + def update_balance_cache_rs(_updates, _balance_count, _expected_prev_hash), do: error() + + @doc """ + Apply targeted participation updates to the cached incremental participation merkle tree. + Returns `{:ok, hash}` or `{:error, :cache_miss}`. + `field_num` is 15 (previous_epoch_participation) or 16 (current_epoch_participation). + `updates` is a list of `{index, new_value}` tuples. + `expected_prev_hash` validates the cache matches the expected parent state. + """ + @spec update_participation_cache( + 15 | 16, + list({non_neg_integer(), non_neg_integer()}), + non_neg_integer(), + binary() + ) :: + {:ok, binary()} | {:error, :cache_miss} + def update_participation_cache(field_num, updates, value_count, expected_prev_hash), + do: update_participation_cache_rs(field_num, updates, value_count, expected_prev_hash) + + def update_participation_cache_rs(_field_num, _updates, _value_count, _expected_prev_hash), + do: error() + + @doc """ + Apply a targeted randao_mixes update to the cached incremental merkle tree. + Returns `{:ok, hash}` or `{:error, :cache_miss}`. + `index` is the position to update, `new_value` is the new 32-byte entry. + `expected_prev_hash` validates the cache matches the expected parent state. + """ + @spec update_randao_cache( + non_neg_integer(), + binary(), + non_neg_integer(), + binary() + ) :: + {:ok, binary()} | {:error, :cache_miss} + def update_randao_cache(index, new_value, total_count, expected_prev_hash), + do: update_randao_cache_rs(index, new_value, total_count, expected_prev_hash) + + def update_randao_cache_rs(_index, _new_value, _total_count, _expected_prev_hash), do: error() + + @doc """ + Perform the full eth2 shuffle in Rust NIF. Takes a list of validator indices, + a 32-byte seed, and the number of shuffle rounds. Returns the shuffled list. + Runs on DirtyCpu scheduler to avoid blocking normal schedulers. + """ + @spec shuffle_list([non_neg_integer()], binary(), non_neg_integer()) :: [non_neg_integer()] + def shuffle_list(indices, seed, rounds), do: shuffle_list_rs(indices, seed, rounds) + + def shuffle_list_rs(_indices, _seed, _rounds), do: error() + + @spec compute_proposer_indices( + binary(), + non_neg_integer(), + non_neg_integer(), + [non_neg_integer()], + [non_neg_integer()], + non_neg_integer(), + non_neg_integer() + ) :: [non_neg_integer()] + def compute_proposer_indices( + epoch_seed, + start_slot, + slots_per_epoch, + active_indices, + effective_balances, + max_effective_balance, + rounds + ), + do: + compute_proposer_indices_rs( + epoch_seed, + start_slot, + slots_per_epoch, + active_indices, + effective_balances, + max_effective_balance, + rounds + ) + + def compute_proposer_indices_rs(_, _, _, _, _, _, _), do: error() + ##### Utils defp error(), do: :erlang.nif_error(:nif_not_loaded) @@ -143,10 +300,6 @@ defmodule Ssz do end end - defp encode(list) when is_list(list) do - Enum.map(list, &encode/1) - end - defp encode(list) when is_list(list), do: list |> Enum.map(&encode/1) defp encode(non_struct), do: non_struct diff --git a/lib/types/beacon_chain/beacon_state.ex b/lib/types/beacon_chain/beacon_state.ex index 2fceb9f28..4a85ad60d 100644 --- a/lib/types/beacon_chain/beacon_state.ex +++ b/lib/types/beacon_chain/beacon_state.ex @@ -273,14 +273,17 @@ defmodule Types.BeaconState do @doc """ Return the deltas for a given ``flag_index`` by scanning through the participation flags. """ - @spec get_flag_index_deltas(t(), integer(), integer()) :: + @spec get_flag_index_deltas(t(), integer(), integer(), MapSet.t(), Types.gwei()) :: Enumerable.t({Types.gwei(), Types.gwei()}) - def get_flag_index_deltas(state, weight, flag_index) do + def get_flag_index_deltas( + state, + weight, + flag_index, + unslashed_participating_indices, + base_reward_per_increment + ) do previous_epoch = Accessors.get_previous_epoch(state) - {:ok, unslashed_participating_indices} = - Accessors.get_unslashed_participating_indices(state, flag_index, previous_epoch) - unslashed_participating_balance = Accessors.get_total_balance(state, unslashed_participating_indices) @@ -293,49 +296,47 @@ defmodule Types.BeaconState do div(Accessors.get_total_active_balance(state), effective_balance_increment) weight_denominator = Constants.weight_denominator() + in_inactivity_leak? = Predicates.in_inactivity_leak?(state) + timely_head_flag_index = Constants.timely_head_flag_index() - previous_epoch = Accessors.get_previous_epoch(state) - - process_reward_and_penalty = fn index -> - base_reward = Accessors.get_base_reward(state, index) - is_unslashed = MapSet.member?(unslashed_participating_indices, index) - - cond do - is_unslashed and Predicates.in_inactivity_leak?(state) -> - 0 + ctx = + {weight, flag_index, effective_balance_increment, base_reward_per_increment, + unslashed_participating_increments, active_increments, weight_denominator, + in_inactivity_leak?, timely_head_flag_index, previous_epoch, + unslashed_participating_indices} - is_unslashed -> - reward_numerator = base_reward * weight * unslashed_participating_increments - div(reward_numerator, active_increments * weight_denominator) + state.validators + |> Stream.with_index() + |> Stream.map(&compute_flag_delta(&1, ctx)) + end - flag_index != Constants.timely_head_flag_index() -> - -div(base_reward * weight, weight_denominator) + defp compute_flag_delta( + {validator, index}, + {weight, flag_index, ebi, brpi, upi, ai, wd, in_leak?, thfi, prev_epoch, indices} + ) do + if Predicates.eligible_validator?(validator, prev_epoch) do + base_reward = div(validator.effective_balance, ebi) * brpi + is_unslashed = MapSet.member?(indices, index) - true -> - 0 + cond do + is_unslashed and in_leak? -> 0 + is_unslashed -> div(base_reward * weight * upi, ai * wd) + flag_index != thfi -> -div(base_reward * weight, wd) + true -> 0 end + else + 0 end - - state.validators - |> Stream.with_index() - |> Stream.map(fn {validator, index} -> - if Predicates.eligible_validator?(validator, previous_epoch), - do: process_reward_and_penalty.(index), - else: 0 - end) end @doc """ Return the inactivity penalty deltas by considering timely target participation flags and inactivity scores. """ - @spec get_inactivity_penalty_deltas(t()) :: Enumerable.t({Types.gwei(), Types.gwei()}) - def get_inactivity_penalty_deltas(%__MODULE__{} = state) do + @spec get_inactivity_penalty_deltas(t(), MapSet.t()) :: + Enumerable.t({Types.gwei(), Types.gwei()}) + def get_inactivity_penalty_deltas(%__MODULE__{} = state, matching_target_indices) do previous_epoch = Accessors.get_previous_epoch(state) - target_index = Constants.timely_target_flag_index() - - {:ok, matching_target_indices} = - Accessors.get_unslashed_participating_indices(state, target_index, previous_epoch) penalty_denominator = ChainSpec.get("INACTIVITY_SCORE_BIAS") * diff --git a/lib/types/state_info.ex b/lib/types/state_info.ex index 2e422053c..3dc715582 100644 --- a/lib/types/state_info.ex +++ b/lib/types/state_info.ex @@ -9,48 +9,71 @@ defmodule Types.StateInfo do """ alias Types.BeaconState - defstruct [:root, :beacon_state, :encoded, :block_root] + defstruct [:root, :beacon_state, :encoded, :block_root, field_hashes: %{}] @type t :: %__MODULE__{ beacon_state: Types.BeaconState.t(), root: Types.root(), - encoded: binary(), - block_root: Types.root() + encoded: binary() | nil, + block_root: Types.root(), + field_hashes: %{non_neg_integer() => binary()} } @spec from_beacon_state(Types.BeaconState.t(), keyword()) :: {:ok, t()} | {:error, binary()} def from_beacon_state(%BeaconState{} = state, fields \\ []) do - with {:ok, encoded} <- fetch_lazy(fields, :encoded, fn -> Ssz.to_ssz(state) end), - {:ok, block_root} <- + cached_field_hashes = Keyword.get(fields, :cached_field_hashes, %{}) + + with {:ok, block_root} <- fetch_lazy(fields, :block_root, fn -> # NOTE: due to how SSZ-hashing works, hash(block) == hash(header) Ssz.hash_tree_root(state.latest_block_header) - end) do - {:ok, from_beacon_state(state, encoded, block_root)} + end), + {:ok, root, field_hashes_binary} <- + Ssz.hash_beacon_state_cached(state, cached_field_hashes) do + field_hashes = parse_field_hashes(field_hashes_binary, 0, %{}) + + {:ok, + %__MODULE__{ + root: root, + beacon_state: state, + block_root: block_root, + field_hashes: field_hashes + }} end end - @spec from_beacon_state(Types.BeaconState.t(), binary(), Types.root()) :: t() - def from_beacon_state(%BeaconState{} = state, encoded, block_root) do - root = Ssz.hash_tree_root!(state) - %__MODULE__{root: root, beacon_state: state, encoded: encoded, block_root: block_root} + # Parse concatenated 32-byte hashes into a map of %{field_index => hash} + defp parse_field_hashes(<<>>, _idx, acc), do: acc + + defp parse_field_hashes(<>, idx, acc) do + parse_field_hashes(rest, idx + 1, Map.put(acc, idx, hash)) end @spec encode(t()) :: binary() + def encode(%__MODULE__{encoded: nil} = state_info) do + {:ok, encoded} = Ssz.to_ssz(state_info.beacon_state) + + {encoded, state_info.root, state_info.block_root, state_info.field_hashes} + |> :erlang.term_to_binary() + end + def encode(%__MODULE__{} = state_info) do - {state_info.encoded, state_info.root, state_info.block_root} |> :erlang.term_to_binary() + {state_info.encoded, state_info.root, state_info.block_root, state_info.field_hashes} + |> :erlang.term_to_binary() end @spec decode(binary()) :: {:ok, t()} | {:error, binary()} def decode(bin) do - with {:ok, encoded, root, block_root} <- :erlang.binary_to_term(bin) |> validate_term(), + with {:ok, encoded, root, block_root, field_hashes} <- + :erlang.binary_to_term(bin) |> validate_term(), {:ok, beacon_state} <- Ssz.from_ssz(encoded, BeaconState) do {:ok, %__MODULE__{ beacon_state: beacon_state, root: root, block_root: block_root, - encoded: encoded + encoded: encoded, + field_hashes: field_hashes }} end end @@ -59,14 +82,23 @@ defmodule Types.StateInfo do with :error <- Keyword.fetch(keyword, key), do: fun.() end - @spec validate_term(term()) :: {:ok, binary(), Types.root(), Types.root()} | {:error, binary()} + @spec validate_term(term()) :: + {:ok, binary(), Types.root(), Types.root(), %{non_neg_integer() => binary()}} + | {:error, binary()} + defp validate_term({ssz_encoded, root, block_root, field_hashes}) + when is_binary(ssz_encoded) and is_binary(root) and is_binary(block_root) and + is_map(field_hashes) do + {:ok, ssz_encoded, root, block_root, field_hashes} + end + + # Backwards compatibility: old 3-tuple format without field_hashes defp validate_term({ssz_encoded, root, block_root}) - when is_binary(ssz_encoded) and is_binary(root) and is_binary(root) do - {:ok, ssz_encoded, root, block_root} + when is_binary(ssz_encoded) and is_binary(root) and is_binary(block_root) do + {:ok, ssz_encoded, root, block_root, %{}} end defp validate_term(other) do {:error, - "Error when decoding state info binary. Expected a {binary(), binary()} tuple. Found: #{inspect(other)}"} + "Error when decoding state info binary. Expected a {binary(), binary(), binary(), map()} tuple. Found: #{inspect(other)}"} end end diff --git a/lib/types/store.ex b/lib/types/store.ex index 926df5d1f..6ed5ea358 100644 --- a/lib/types/store.ex +++ b/lib/types/store.ex @@ -13,6 +13,7 @@ defmodule Types.Store do alias LambdaEthereumConsensus.Store.Blocks alias LambdaEthereumConsensus.Store.BlockStates alias LambdaEthereumConsensus.Store.CheckpointStates + alias LambdaEthereumConsensus.Store.StateDb alias Types.BeaconBlock alias Types.BeaconState alias Types.BlockInfo @@ -87,6 +88,8 @@ defmodule Types.Store do time = anchor_state.genesis_time + ChainSpec.get("SECONDS_PER_SLOT") * anchor_state.slot BlockStates.store_state_info(state_info) + # Persist anchor state to LevelDB (BlockStates LRU no longer writes to DB) + Task.start(fn -> StateDb.store_state_info(state_info) end) CheckpointStates.put(anchor_checkpoint, anchor_state) %__MODULE__{ @@ -121,12 +124,21 @@ defmodule Types.Store do end def get_ancestor(%__MODULE__{} = store, root, slot) do - block = Blocks.get_block!(root) - - if block.slot > slot do - get_ancestor(store, block.parent_root, slot) - else - root + # Cache-only block lookup to avoid blocking Libp2pPort on eleveldb.get/3. + # On miss, return root as-is (same behavior as pruned blocks). + case Blocks.get_block_cached(root) do + nil -> + # Block has been pruned or evicted from cache. Return the root as-is + # so callers that compare ancestors (get_weight, finalized_check) will + # see a non-matching root and correctly discard the entry. + root + + block -> + if block.slot > slot do + get_ancestor(store, block.parent_root, slot) + else + root + end end end @@ -147,7 +159,11 @@ defmodule Types.Store do def get_children(%__MODULE__{tree_cache: tree}, parent_root) do case Tree.get_children(tree, parent_root) do {:ok, children} -> - Enum.map(children, &{&1, Blocks.get_block!(&1)}) + # Cache-only to avoid blocking Libp2pPort on LevelDB reads. + # Filter out any children whose block data isn't cached. + children + |> Enum.map(fn root -> {root, Blocks.get_block_cached(root)} end) + |> Enum.reject(fn {_root, block} -> is_nil(block) end) {:error, :not_found} -> Logger.warning( @@ -193,6 +209,17 @@ defmodule Types.Store do end end + @doc """ + Like get_state/2 but only checks in-memory maps and the ETS LRU cache. + Does NOT fall through to LevelDB. Returns nil on cache miss. + Used by prefetch_states to avoid 28-85s LevelDB reads. + """ + def get_state_cached(store, root) when is_binary(root) do + with nil <- Map.get(store.states, root) do + BlockStates.get_state_info_cached(root) + end + end + def get_state!(store, root) do %StateInfo{} = get_state(store, root) end @@ -220,6 +247,20 @@ defmodule Types.Store do end end + @doc """ + Like get_checkpoint_state/2 but only uses in-memory and ETS-cached states. + Does NOT fall through to LevelDB on cache miss, returning {store, nil} instead. + Used by prefetch_states to avoid blocking the ForkChoice GenServer for 28-85s + during LevelDB deserialization of 775MB mainnet BeaconStates. + """ + @spec get_checkpoint_state_cached(t(), Types.Checkpoint.t()) :: {t(), BeaconState.t() | nil} + def get_checkpoint_state_cached(store, %Checkpoint{} = checkpoint) do + case Map.get(store.checkpoint_states, checkpoint) do + nil -> compute_checkpoint_state_cached(store, checkpoint) + state -> {store, state} + end + end + def remove_cache(%__MODULE__{} = store) do store |> Map.put(:states, %{}) |> Map.put(:checkpoint_states, %{}) end @@ -242,21 +283,91 @@ defmodule Types.Store do end defp update_tree(%__MODULE__{} = store, block_root, parent_root) do - # We expect the finalized block to be in the tree - tree = Tree.update_root!(store.tree_cache, store.finalized_checkpoint.root) + finalized_root = store.finalized_checkpoint.root + + tree = + case Tree.update_root(store.tree_cache, finalized_root) do + {:ok, pruned} -> + pruned + + {:error, :not_found} -> + # Tree is stale (e.g. after restart/recovery). Rebuild from finalized root. + Logger.warning( + "[Store] Finalized root #{Base.encode16(finalized_root)} not in tree, rebuilding" + ) + + Tree.new(finalized_root) + end case Tree.add_block(tree, block_root, parent_root) do - {:ok, new_tree} -> %{store | tree_cache: new_tree} - # Block is older than current finalized block, or parent not in tree. - # Still save the pruned tree so tree_cache stays in sync with finalized_checkpoint. - {:error, :not_found} -> %{store | tree_cache: tree} + {:ok, new_tree} -> + %{store | tree_cache: new_tree} + + {:error, :not_found} -> + # Parent not in tree. Walk the parent chain from parent_root back to + # the finalized root and add all intermediate blocks. This repairs the + # tree after it was rebuilt with only the finalized root, or after + # blocks were pruned but the chain wasn't maintained. + repaired = repair_tree_chain(tree, finalized_root, parent_root) + + case Tree.add_block(repaired, block_root, parent_root) do + {:ok, new_tree} -> %{store | tree_cache: new_tree} + {:error, :not_found} -> %{store | tree_cache: repaired} + end + end + end + + # Repair a tree by walking the parent chain from target_root back to + # finalized_root and adding all intermediate blocks. This fills in gaps + # when the tree only has the finalized root but blocks have been processed + # beyond it (e.g., after a Tree.new rebuild or finalization advance). + defp repair_tree_chain(tree, finalized_root, target_root) do + chain = collect_parent_chain(target_root, finalized_root, []) + + if chain != [] do + Logger.info("[Store] Repairing tree: adding #{length(chain)} blocks from parent chain") + end + + Enum.reduce(chain, tree, fn {root, parent}, acc -> + case Tree.add_block(acc, root, parent) do + {:ok, t} -> t + {:error, _} -> acc + end + end) + end + + # Walk from current_root back to finalized_root, collecting {root, parent} pairs. + # Returns the chain in order from finalized_root's child down to current_root. + defp collect_parent_chain(current_root, finalized_root, acc) + when current_root == finalized_root, + do: acc + + defp collect_parent_chain(current_root, finalized_root, acc) do + case Blocks.get_block_info_cached(current_root) do + %BlockInfo{signed_block: %{message: %{parent_root: parent}}} -> + collect_parent_chain(parent, finalized_root, [{current_root, parent} | acc]) + + _ -> + # Can't walk further (block not found or pruned), return what we have + Logger.warning( + "[Store] Parent chain walk stopped at #{Base.encode16(current_root)}, " <> + "#{length(acc)} blocks collected" + ) + + acc end end @spec update_head_info(t()) :: t() def update_head_info(store) do {:ok, head_root} = Head.get_head(store) - %{slot: head_slot} = Blocks.get_block!(head_root) + + head_slot = + case Blocks.get_block_cached(head_root) do + nil -> store.head_slot || 0 + block -> block.slot + end + update_head_info(store, head_slot, head_root) end @@ -285,4 +396,24 @@ defmodule Types.Store do end end end + + # Like compute_checkpoint_state but uses cache-only state lookup. + defp compute_checkpoint_state_cached(store, checkpoint) do + target_slot = Misc.compute_start_slot_at_epoch(checkpoint.epoch) + + case get_state_cached(store, checkpoint.root) do + nil -> + {store, nil} + + %StateInfo{beacon_state: state} -> + if state.slot < target_slot do + {:ok, new_state, _timings} = StateTransition.process_slots(state, target_slot) + + {update_in(store.checkpoint_states, fn s -> Map.put(s, checkpoint, new_state) end), + new_state} + else + {store, state} + end + end + end end diff --git a/lib/utils/mem.ex b/lib/utils/mem.ex new file mode 100644 index 000000000..28126d520 --- /dev/null +++ b/lib/utils/mem.ex @@ -0,0 +1,515 @@ +defmodule LambdaEthereumConsensus.Mem do + @moduledoc """ + Memory introspection utilities for diagnosing BeaconState memory usage. + + Usage in IEx (via `make iex` or `make test-iex`): + + alias LambdaEthereumConsensus.Mem + Mem.report() # Full memory report + Mem.ets_tables() # All ETS tables ranked by memory + Mem.top_processes(10) # Top 10 processes by heap size + Mem.state_cache_detail() # Per-entry breakdown of BlockStates cache + Mem.checkpoint_detail() # Per-entry breakdown of CheckpointStates + Mem.binary_stats() # Binary/refc binary pressure + Mem.cache_tables() # StateTransition cache table sizes + """ + + @word_size :erlang.system_info(:wordsize) + + # Known ETS tables in this project + @known_tables [ + :states_by_block_hash, + :states_by_block_hash_ttl_data, + :blocks_by_hash, + :blocks_by_hash_ttl_data, + :checkpoint_states, + :total_active_balance, + :beacon_proposer_index, + :active_validator_count, + :beacon_committee, + :active_validator_indices, + :sync_committee_indices + ] + + # ── Full Report ────────────────────────────────────────────────────── + + @doc """ + Print a full memory report: BEAM totals, ETS breakdown, top processes, and cache details. + """ + def report do + IO.puts("\n=== BEAM Memory Summary ===\n") + beam_summary() + + IO.puts("\n=== ETS Tables (Top 20 by Memory) ===\n") + ets_tables(20) + + IO.puts("\n=== Top 10 Processes by Heap ===\n") + top_processes(10) + + IO.puts("\n=== BlockStates Cache (#{table_entry_count(:states_by_block_hash)} entries) ===\n") + state_cache_detail() + + IO.puts("\n=== CheckpointStates (#{table_entry_count(:checkpoint_states)} entries) ===\n") + checkpoint_detail() + + IO.puts("\n=== StateTransition Caches ===\n") + cache_tables() + + IO.puts("\n=== Binary / Refc Binary Stats ===\n") + binary_stats() + + :ok + end + + # ── BEAM Memory ────────────────────────────────────────────────────── + + @doc "Print BEAM memory breakdown from :erlang.memory/0." + def beam_summary do + mem = :erlang.memory() + + rows = [ + {"total", mem[:total]}, + {"processes", mem[:processes]}, + {"processes_used", mem[:processes_used]}, + {"ets", mem[:ets]}, + {"binary", mem[:binary]}, + {"code", mem[:code]}, + {"atom", mem[:atom]}, + {"system", mem[:system]} + ] + + header = + String.pad_trailing("Category", 20) <> + String.pad_leading("Bytes", 16) <> String.pad_leading("Human", 12) + + IO.puts(header) + IO.puts(String.duplicate("-", 48)) + + Enum.each(rows, fn {label, bytes} -> + IO.puts( + String.pad_trailing(label, 20) <> + String.pad_leading(Integer.to_string(bytes), 16) <> + String.pad_leading(human(bytes), 12) + ) + end) + end + + # ── ETS Tables ─────────────────────────────────────────────────────── + + @doc "List all ETS tables ranked by memory usage." + def ets_tables(limit \\ 30) do + tables = + :ets.all() + |> Enum.map(fn tab -> + info = :ets.info(tab) + + if info do + %{ + name: info[:name] || tab, + id: tab, + size: info[:size], + memory_words: info[:memory], + memory_bytes: info[:memory] * @word_size, + type: info[:type], + owner: info[:owner] + } + end + end) + |> Enum.reject(&is_nil/1) + |> Enum.sort_by(& &1.memory_bytes, :desc) + |> Enum.take(limit) + + header = + String.pad_trailing("Table", 40) <> + String.pad_leading("Entries", 10) <> + String.pad_leading("Memory", 14) <> + String.pad_leading("Type", 14) + + IO.puts(header) + IO.puts(String.duplicate("-", 78)) + + Enum.each(tables, fn t -> + IO.puts( + String.pad_trailing(inspect(t.name), 40) <> + String.pad_leading(Integer.to_string(t.size), 10) <> + String.pad_leading(human(t.memory_bytes), 14) <> + String.pad_leading(Atom.to_string(t.type), 14) + ) + end) + end + + # ── Top Processes ──────────────────────────────────────────────────── + + @doc "List top N processes by total memory (heap + stack + mailbox)." + def top_processes(n \\ 10) do + procs = + Process.list() + |> Enum.map(fn pid -> + case Process.info(pid, [ + :memory, + :heap_size, + :stack_size, + :message_queue_len, + :registered_name, + :current_function + ]) do + nil -> + nil + + info -> + %{ + pid: pid, + name: info[:registered_name] || info[:current_function] || pid, + memory: info[:memory], + heap_words: info[:heap_size], + stack_words: info[:stack_size], + mq_len: info[:message_queue_len] + } + end + end) + |> Enum.reject(&is_nil/1) + |> Enum.sort_by(& &1.memory, :desc) + |> Enum.take(n) + + header = + String.pad_trailing("Process", 50) <> + String.pad_leading("Memory", 12) <> + String.pad_leading("Heap", 12) <> + String.pad_leading("MQ Len", 10) + + IO.puts(header) + IO.puts(String.duplicate("-", 84)) + + Enum.each(procs, fn p -> + IO.puts( + String.pad_trailing(format_name(p.name), 50) <> + String.pad_leading(human(p.memory), 12) <> + String.pad_leading(human(p.heap_words * @word_size), 12) <> + String.pad_leading(Integer.to_string(p.mq_len), 10) + ) + end) + end + + # ── BlockStates Cache Detail ───────────────────────────────────────── + + @doc """ + Inspect each entry in the BlockStates ETS cache. + Shows per-entry: whether `encoded` is present, beacon_state field sizes, field_hashes count. + """ + def state_cache_detail do + case safe_ets_tab2list(:states_by_block_hash) do + nil -> + IO.puts("Table :states_by_block_hash not found (node not running?)") + + entries -> + if entries == [] do + IO.puts("(empty)") + else + header = + String.pad_trailing("Root (hex prefix)", 20) <> + String.pad_leading("Slot", 10) <> + String.pad_leading("Encoded?", 10) <> + String.pad_leading("Enc. Size", 12) <> + String.pad_leading("Validators", 12) <> + String.pad_leading("FieldHash#", 12) <> + String.pad_leading("ETS Words", 12) + + IO.puts(header) + IO.puts(String.duplicate("-", 88)) + + Enum.each(entries, fn {root, state_info, _ttl} -> + bs = state_info.beacon_state + root_hex = Base.encode16(root, case: :lower) |> String.slice(0, 16) + slot = bs.slot + has_encoded = if state_info.encoded, do: "yes", else: "no" + enc_size = if state_info.encoded, do: byte_size(state_info.encoded), else: 0 + + val_count = + if is_struct(bs.validators, Aja.Vector), + do: Aja.Vector.size(bs.validators), + else: length(bs.validators) + + fh_count = map_size(state_info.field_hashes) + + # Measure actual ETS memory for this entry + ets_words = ets_entry_words(:states_by_block_hash, root) + + IO.puts( + String.pad_trailing(root_hex <> "...", 20) <> + String.pad_leading(Integer.to_string(slot), 10) <> + String.pad_leading(has_encoded, 10) <> + String.pad_leading(human(enc_size), 12) <> + String.pad_leading(Integer.to_string(val_count), 12) <> + String.pad_leading(Integer.to_string(fh_count), 12) <> + String.pad_leading(human(ets_words * @word_size), 12) + ) + end) + end + + total_mem = :ets.info(:states_by_block_hash, :memory) * @word_size + IO.puts("\nTotal table memory: #{human(total_mem)}") + end + end + + # ── CheckpointStates Detail ────────────────────────────────────────── + + @doc "Inspect the checkpoint_states ETS table: entries, total memory." + def checkpoint_detail do + case safe_ets_tab2list(:checkpoint_states) do + nil -> + IO.puts("Table :checkpoint_states not found (node not running?)") + + entries -> + count = length(entries) + total_mem = :ets.info(:checkpoint_states, :memory) * @word_size + + IO.puts("Entries: #{count}") + IO.puts("Total memory: #{human(total_mem)}") + + if count > 0 do + IO.puts("") + + header = + String.pad_trailing("Epoch", 10) <> + String.pad_leading("Slot", 10) <> + String.pad_leading("Root (prefix)", 20) + + IO.puts(header) + IO.puts(String.duplicate("-", 40)) + + Enum.each(entries, fn {checkpoint, state} -> + root_hex = Base.encode16(checkpoint.root, case: :lower) |> String.slice(0, 16) + + IO.puts( + String.pad_trailing(Integer.to_string(checkpoint.epoch), 10) <> + String.pad_leading(Integer.to_string(state.slot), 10) <> + String.pad_leading(root_hex <> "...", 20) + ) + end) + end + end + end + + # ── StateTransition Caches ────────────────────────────────────────── + + @doc "Show sizes and memory of the 6 StateTransition cache ETS tables." + def cache_tables do + cache_names = [ + :total_active_balance, + :beacon_proposer_index, + :active_validator_count, + :beacon_committee, + :active_validator_indices, + :sync_committee_indices + ] + + header = + String.pad_trailing("Cache Table", 30) <> + String.pad_leading("Entries", 10) <> + String.pad_leading("Memory", 14) + + IO.puts(header) + IO.puts(String.duplicate("-", 54)) + + total = + Enum.reduce(cache_names, 0, fn name, acc -> + case :ets.info(name) do + :undefined -> + IO.puts(String.pad_trailing(Atom.to_string(name), 30) <> " (not created)") + acc + + info -> + mem = info[:memory] * @word_size + + IO.puts( + String.pad_trailing(Atom.to_string(name), 30) <> + String.pad_leading(Integer.to_string(info[:size]), 10) <> + String.pad_leading(human(mem), 14) + ) + + acc + mem + end + end) + + IO.puts(String.duplicate("-", 54)) + + IO.puts( + String.pad_trailing("TOTAL", 30) <> + String.pad_leading("", 10) <> String.pad_leading(human(total), 14) + ) + end + + # ── Binary Stats ───────────────────────────────────────────────────── + + @doc """ + Show binary/refc binary memory stats. + Large binaries (>64 bytes) are reference-counted and shared between processes. + Leaking binary references is a common BEAM memory issue. + """ + def binary_stats do + mem = :erlang.memory() + binary_mem = mem[:binary] + + IO.puts("Binary memory (refc binaries): #{human(binary_mem)}") + IO.puts("Total BEAM memory: #{human(mem[:total])}") + + IO.puts( + "Binary as % of total: #{Float.round(binary_mem / max(mem[:total], 1) * 100, 1)}%" + ) + + IO.puts("") + + # Find top processes by binary memory + IO.puts("Top 5 processes by binary references:") + IO.puts("") + + procs = + Process.list() + |> Enum.map(fn pid -> + case Process.info(pid, [:binary, :registered_name, :memory]) do + nil -> + nil + + info -> + bins = info[:binary] || [] + bin_mem = bins |> Enum.map(fn {_ref, size, _refcount} -> size end) |> Enum.sum() + + %{ + pid: pid, + name: info[:registered_name] || pid, + memory: info[:memory], + bin_count: length(bins), + bin_mem: bin_mem + } + end + end) + |> Enum.reject(&is_nil/1) + |> Enum.sort_by(& &1.bin_mem, :desc) + |> Enum.take(5) + + header = + String.pad_trailing("Process", 45) <> + String.pad_leading("Bin Count", 12) <> + String.pad_leading("Bin Memory", 14) <> + String.pad_leading("Total Mem", 14) + + IO.puts(header) + IO.puts(String.duplicate("-", 85)) + + Enum.each(procs, fn p -> + IO.puts( + String.pad_trailing(format_name(p.name), 45) <> + String.pad_leading(Integer.to_string(p.bin_count), 12) <> + String.pad_leading(human(p.bin_mem), 14) <> + String.pad_leading(human(p.memory), 14) + ) + end) + end + + # ── Libp2pPort / Store Introspection ───────────────────────────────── + + @doc """ + Inspect the Libp2pPort GenServer state size. This process holds the Store + with `store.states` and `store.checkpoint_states` maps. + + WARNING: This calls :sys.get_state which briefly blocks the GenServer. + Do NOT call during active sync. + """ + def libp2p_port_state do + pid = Process.whereis(LambdaEthereumConsensus.Libp2pPort) + + if pid do + info = Process.info(pid, [:memory, :heap_size, :message_queue_len]) + IO.puts("Libp2pPort process memory: #{human(info[:memory])}") + IO.puts("Heap: #{human(info[:heap_size] * @word_size)}") + IO.puts("Message queue: #{info[:message_queue_len]}") + else + IO.puts("Libp2pPort not running") + end + end + + # ── ETS Memory Delta Tracking ──────────────────────────────────────── + + @doc """ + Take a snapshot of all known ETS tables. Call this before an operation, + then call `diff_snapshot/1` after to see what changed. + + snap = Mem.snapshot() + # ... do some operation ... + Mem.diff_snapshot(snap) + """ + def snapshot do + @known_tables + |> Enum.map(fn name -> + case :ets.info(name) do + :undefined -> {name, %{size: 0, memory: 0}} + info -> {name, %{size: info[:size], memory: info[:memory] * @word_size}} + end + end) + |> Map.new() + end + + @doc "Compare current ETS state against a previous snapshot." + def diff_snapshot(prev) do + current = snapshot() + + header = + String.pad_trailing("Table", 35) <> + String.pad_leading("Entries", 12) <> + String.pad_leading("Memory", 14) <> + String.pad_leading("Delta", 14) + + IO.puts(header) + IO.puts(String.duplicate("-", 75)) + + Enum.each(@known_tables, fn name -> + p = Map.get(prev, name, %{size: 0, memory: 0}) + c = Map.get(current, name, %{size: 0, memory: 0}) + delta = c.memory - p.memory + + if delta != 0 do + sign = if delta > 0, do: "+", else: "" + + IO.puts( + String.pad_trailing(Atom.to_string(name), 35) <> + String.pad_leading("#{p.size}→#{c.size}", 12) <> + String.pad_leading(human(c.memory), 14) <> + String.pad_leading("#{sign}#{human(delta)}", 14) + ) + end + end) + end + + # ── Helpers ────────────────────────────────────────────────────────── + + defp human(bytes) when bytes >= 1_073_741_824, do: "#{Float.round(bytes / 1_073_741_824, 2)} GB" + defp human(bytes) when bytes >= 1_048_576, do: "#{Float.round(bytes / 1_048_576, 1)} MB" + defp human(bytes) when bytes >= 1_024, do: "#{Float.round(bytes / 1_024, 1)} KB" + defp human(bytes), do: "#{bytes} B" + + defp format_name(name) when is_atom(name), do: inspect(name) + defp format_name({m, f, a}), do: "#{inspect(m)}.#{f}/#{a}" + defp format_name(pid) when is_pid(pid), do: inspect(pid) + defp format_name(other), do: inspect(other) + + defp safe_ets_tab2list(table) do + case :ets.info(table) do + :undefined -> nil + _ -> :ets.tab2list(table) + end + end + + defp table_entry_count(table) do + case :ets.info(table, :size) do + :undefined -> "?" + n -> n + end + end + + defp ets_entry_words(table, key) do + # Estimate: total table memory / entry count (ETS doesn't expose per-entry sizes) + total = :ets.info(table, :memory) + size = :ets.info(table, :size) + if size > 0, do: div(total, size), else: 0 + end +end diff --git a/metrics/grafana/provisioning/dashboards/home.json b/metrics/grafana/provisioning/dashboards/home.json index b31337f58..9e1afa2f1 100644 --- a/metrics/grafana/provisioning/dashboards/home.json +++ b/metrics/grafana/provisioning/dashboards/home.json @@ -271,7 +271,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "expr": "sum(network_pubsub_peers_count{result=\"add\"}) - sum(network_pubsub_peers_count{result=\"remove\"})", + "expr": "peerbook_peers_count", "legendFormat": "{{job}}", "refId": "A" } @@ -1338,7 +1338,7 @@ "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(network_pubsub_peers_count{result=\"add\"}) - sum(network_pubsub_peers_count{result=\"remove\"})", + "expr": "peerbook_peers_count", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, diff --git a/metrics/prometheus/prometheus.yml b/metrics/prometheus/prometheus.yml index d629f582b..e9a3c7211 100644 --- a/metrics/prometheus/prometheus.yml +++ b/metrics/prometheus/prometheus.yml @@ -1,5 +1,10 @@ global: - scrape_interval: 1s + # The /metrics endpoint on mainnet returns ~96k lines and takes ~2s to serve, + # so the previous 1s scrape_interval caused every scrape to time out + # (scrape_timeout defaults to scrape_interval). Use 15s / 10s — enough headroom + # for mainnet, while still fine-grained for dashboards. + scrape_interval: 15s + scrape_timeout: 10s scrape_configs: - job_name: "prom_ex" diff --git a/native/libp2p_port/internal/reqresp/reqresp.go b/native/libp2p_port/internal/reqresp/reqresp.go index 8e261f83e..79671ca48 100644 --- a/native/libp2p_port/internal/reqresp/reqresp.go +++ b/native/libp2p_port/internal/reqresp/reqresp.go @@ -15,6 +15,7 @@ import ( "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" + "github.com/libp2p/go-libp2p/p2p/net/connmgr" "github.com/libp2p/go-libp2p/p2p/security/noise" "github.com/libp2p/go-libp2p/p2p/transport/tcp" ma "github.com/multiformats/go-multiaddr" @@ -33,6 +34,17 @@ type Listener struct { func NewListener(p *port.Port, config *proto_helpers.Config) Listener { ifaceKey, err := utils.ConvertToInterfacePrivkey(config.Privkey) utils.PanicIfError(err) + + // Bound peer connections to prevent message queue overflow in the Elixir + // Libp2pPort GenServer. Without limits, Go accepts hundreds of peers whose + // gossip messages flood the port, causing 500K+ message queue buildup. + cm, err := connmgr.NewConnManager( + 60, // LowWater: start pruning when above this many peers + 80, // HighWater: aggressively prune down to LowWater above this + connmgr.WithGracePeriod(time.Minute), // new peers get 1 min grace + ) + utils.PanicIfError(err) + // as per the spec optionsSlice := []libp2p.Option{ libp2p.DefaultMuxers, @@ -42,6 +54,7 @@ func NewListener(p *port.Port, config *proto_helpers.Config) Listener { libp2p.DisableRelay(), libp2p.NATPortMap(), // Allow to use UPnP libp2p.Ping(false), + libp2p.ConnectionManager(cm), libp2p.ListenAddrStrings(config.ListenAddr...), libp2p.Identity(ifaceKey), } diff --git a/native/ssz_nif/Cargo.lock b/native/ssz_nif/Cargo.lock index 82696c4c2..b087ddd7a 100644 --- a/native/ssz_nif/Cargo.lock +++ b/native/ssz_nif/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -1351,9 +1351,11 @@ dependencies = [ name = "ssz_nif" version = "0.1.0" dependencies = [ + "ethereum_hashing", "ethereum_ssz", "ethereum_ssz_derive", "rustler", + "sha2", "ssz_types", "tree_hash", "tree_hash_derive", diff --git a/native/ssz_nif/Cargo.toml b/native/ssz_nif/Cargo.toml index afd7bd560..9cbc8385d 100644 --- a/native/ssz_nif/Cargo.toml +++ b/native/ssz_nif/Cargo.toml @@ -13,6 +13,8 @@ crate-type = ["cdylib"] rustler = "0.32.1" ethereum_ssz_derive = "0.8.3" ethereum_ssz = "0.8.3" +ethereum_hashing = { version = "0.7.0", features = ["zero_hash_cache"] } ssz_types = "0.10.1" tree_hash = "0.9.1" tree_hash_derive = "0.9.1" +sha2 = "0.10" diff --git a/native/ssz_nif/src/lib.rs b/native/ssz_nif/src/lib.rs index 9aff9e862..dd2c58310 100644 --- a/native/ssz_nif/src/lib.rs +++ b/native/ssz_nif/src/lib.rs @@ -11,6 +11,7 @@ pub(crate) mod utils; use crate::utils::{helpers::bytes_to_binary, schema_match}; use rustler::{Atom, Binary, Encoder, Env, NifResult, Term}; +use std::collections::HashMap; mod atoms { use rustler::atoms; @@ -131,6 +132,200 @@ fn hash_tree_root_vector_rs<'env>( Ok((atoms::ok(), bytes_to_binary(env, &serialized?)).encode(env)) } +/// Parse a map of {u32 => Binary} into a HashMap of {u32 => [u8; 32]}. +fn decode_cached_hashes(cached_hashes_map: Term) -> NifResult> { + let cached_raw: HashMap = cached_hashes_map.decode()?; + let mut cached: HashMap = HashMap::with_capacity(cached_raw.len()); + for (k, v) in cached_raw { + let arr: [u8; 32] = v + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + cached.insert(k, arr); + } + Ok(cached) +} + +#[rustler::nif(schedule = "DirtyCpu")] +fn hash_beacon_state_cached_rs<'a>( + env: Env<'a>, + state: Term<'a>, + cached_hashes_map: Term<'a>, + config: Atom, +) -> NifResult> { + let config_str = config.to_term(env).atom_to_string()?; + let cached = decode_cached_hashes(cached_hashes_map)?; + + let result = match config_str.as_str() { + "mainnet" => crate::utils::cached_hash::hash_beacon_state_cached::< + crate::ssz_types::config::Mainnet, + >(env, state, &cached)?, + "minimal" => crate::utils::cached_hash::hash_beacon_state_cached::< + crate::ssz_types::config::Minimal, + >(env, state, &cached)?, + "gnosis" => crate::utils::cached_hash::hash_beacon_state_cached::< + crate::ssz_types::config::Gnosis, + >(env, state, &cached)?, + _ => return Err(rustler::Error::BadArg), + }; + + Ok(( + atoms::ok(), + bytes_to_binary(env, &result.root), + bytes_to_binary(env, &result.field_hashes), + ) + .encode(env)) +} + +/// Apply targeted balance updates to the cached incremental balance merkle tree. +/// Returns `{:ok, hash}` if the cache is valid, or `{:error, :cache_miss}` if the cache +/// needs to be rebuilt (caller should fall through to the full hash path). +/// +/// `updates` is a list of `{index :: u32, new_value :: u64}` tuples. +/// `balance_count` is the current total number of balances (for mix_in_length). +#[rustler::nif(schedule = "DirtyCpu")] +fn update_balance_cache_rs<'a>( + env: Env<'a>, + updates: Vec<(u32, u64)>, + balance_count: u64, + expected_prev_hash: Binary<'a>, +) -> NifResult> { + let prev_hash: &[u8; 32] = expected_prev_hash + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + match crate::utils::balance_cache::apply_updates_and_hash( + &updates, + balance_count as usize, + prev_hash, + ) { + Some(hash) => Ok((atoms::ok(), bytes_to_binary(env, &hash)).encode(env)), + None => { + let error_atom = Atom::from_str(env, "error")?; + let miss_atom = Atom::from_str(env, "cache_miss")?; + Ok((error_atom, miss_atom).encode(env)) + } + } +} + +/// Apply targeted participation updates to the cached incremental participation merkle tree. +/// Returns `{:ok, hash}` if the cache is valid, or `{:error, :cache_miss}` if the cache +/// needs to be rebuilt (caller should fall through to the full hash path). +/// +/// `field_num` is 15 (previous_epoch_participation) or 16 (current_epoch_participation). +/// `updates` is a list of `{index :: u32, new_value :: u8}` tuples. +/// `value_count` is the current total number of participation entries (for mix_in_length). +#[rustler::nif(schedule = "DirtyCpu")] +fn update_participation_cache_rs<'a>( + env: Env<'a>, + field_num: u32, + updates: Vec<(u32, u8)>, + value_count: u64, + expected_prev_hash: Binary<'a>, +) -> NifResult> { + let prev_hash: &[u8; 32] = expected_prev_hash + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + match crate::utils::participation_cache::apply_participation_updates( + field_num, + &updates, + value_count as usize, + prev_hash, + ) { + Some(hash) => Ok((atoms::ok(), bytes_to_binary(env, &hash)).encode(env)), + None => { + let error_atom = Atom::from_str(env, "error")?; + let miss_atom = Atom::from_str(env, "cache_miss")?; + Ok((error_atom, miss_atom).encode(env)) + } + } +} + +/// Apply a single targeted randao_mixes update to the cached incremental merkle tree. +/// Returns `{:ok, hash}` if the cache is valid, or `{:error, :cache_miss}` on miss. +/// +/// `index` is the position to update, `new_value` is the new 32-byte entry. +/// `total_count` is the total number of randao mix entries. +/// `expected_prev_hash` validates the cache matches the expected parent state. +#[rustler::nif(schedule = "DirtyCpu")] +fn update_randao_cache_rs<'a>( + env: Env<'a>, + index: u64, + new_value: Binary<'a>, + total_count: u64, + expected_prev_hash: Binary<'a>, +) -> NifResult> { + let value: &[u8; 32] = new_value + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + let prev_hash: &[u8; 32] = expected_prev_hash + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + match crate::utils::randao_cache::apply_randao_update( + index as usize, + value, + total_count as usize, + prev_hash, + ) { + Some(hash) => Ok((atoms::ok(), bytes_to_binary(env, &hash)).encode(env)), + None => { + let error_atom = Atom::from_str(env, "error")?; + let miss_atom = Atom::from_str(env, "cache_miss")?; + Ok((error_atom, miss_atom).encode(env)) + } + } +} + +#[rustler::nif(schedule = "DirtyCpu")] +fn compute_proposer_indices_rs( + epoch_seed: Binary, + start_slot: u64, + slots_per_epoch: u32, + active_indices: Vec, + effective_balances: Vec, + max_effective_balance: u64, + rounds: u32, +) -> NifResult> { + if epoch_seed.len() != 32 { + return Err(rustler::Error::BadArg); + } + let seed: &[u8; 32] = epoch_seed + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + Ok(crate::utils::shuffle::compute_proposer_indices( + seed, + start_slot, + slots_per_epoch, + &active_indices, + &effective_balances, + max_effective_balance, + rounds, + )) +} + +#[rustler::nif(schedule = "DirtyCpu")] +fn shuffle_list_rs<'env>( + env: Env<'env>, + indices: Vec, + seed: Binary, + rounds: u32, +) -> NifResult> { + if seed.len() != 32 { + return Err(rustler::Error::BadArg); + } + let seed_arr: &[u8; 32] = seed + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + let mut arr = indices; + crate::utils::shuffle::shuffle_list(&mut arr, seed_arr, rounds); + Ok(arr) +} + rustler::init!( "Elixir.Ssz", [ @@ -140,5 +335,11 @@ rustler::init!( hash_tree_root_rs, hash_tree_root_list_rs, hash_tree_root_vector_rs, + hash_beacon_state_cached_rs, + update_balance_cache_rs, + update_participation_cache_rs, + update_randao_cache_rs, + shuffle_list_rs, + compute_proposer_indices_rs, ] ); diff --git a/native/ssz_nif/src/utils/balance_cache.rs b/native/ssz_nif/src/utils/balance_cache.rs new file mode 100644 index 000000000..abcc6ef04 --- /dev/null +++ b/native/ssz_nif/src/utils/balance_cache.rs @@ -0,0 +1,359 @@ +//! Incremental merkle tree cache for BeaconState balances (field 12). +//! +//! SSZ hashes `VariableList` as a binary merkle tree +//! where every 4 u64 values are packed into one 32-byte leaf chunk. With ~2.2M +//! validators the tree has ~550K populated leaves out of 2^38 total positions. +//! +//! On non-epoch blocks only ~528 balances change (512 sync committee + 16 withdrawals), +//! so rebuilding the entire tree is extremely wasteful. This module caches the tree +//! and updates only the affected paths on subsequent calls. +//! +//! ## Tree layout +//! +//! A dense subtree covers the first `DENSE_LEAF_COUNT` (2^20 = 1M) leaf positions, +//! enough for 4M validators. Above that, 18 sparse levels use precomputed zero-hashes. +//! +//! Dense flat array (1-indexed): +//! nodes[1] = subtree root +//! nodes[2], nodes[3] = depth 1 +//! ... +//! nodes[DENSE_LEAF_COUNT .. 2*DENSE_LEAF_COUNT - 1] = leaves (packed u64 chunks) + +use std::sync::{LazyLock, Mutex}; + +use ethereum_hashing::{hash32_concat, ZERO_HASHES}; + +/// u64 packing factor: 4 u64s (32 bytes) per leaf chunk. +const PACKING_FACTOR: usize = 4; + +/// Dense subtree depth. 2^20 = 1,048,576 leaf positions → supports up to 4M validators. +const DENSE_DEPTH: usize = 20; +const DENSE_LEAF_COUNT: usize = 1 << DENSE_DEPTH; // 1,048,576 +const DENSE_NODE_COUNT: usize = 2 * DENSE_LEAF_COUNT; // 2,097,152 + +/// Total tree depth for `VariableList`. +/// ValidatorRegistryLimit = 2^40 for all configs. max_chunks = 2^40/4 = 2^38. depth = 38. +const TOTAL_DEPTH: usize = 38; + +struct BalanceMerkleCache { + /// Flat binary tree: nodes[1] = subtree root, leaves at [DENSE_LEAF_COUNT .. 2*DENSE_LEAF_COUNT). + /// Index 0 is unused. + nodes: Vec<[u8; 32]>, + /// Cached balance values for diffing. + balances: Vec, + /// Whether the cache has been initialized. + valid: bool, + /// The last computed root hash (with mix_in_length), used to validate + /// that the cache corresponds to the expected parent state on the caller's fork. + last_root: [u8; 32], +} + +impl BalanceMerkleCache { + fn new() -> Self { + Self { + nodes: Vec::new(), + balances: Vec::new(), + valid: false, + last_root: [0u8; 32], + } + } + + /// Build the full tree from scratch. + fn initialize(&mut self, balances: &[u64]) { + let chunk_count = balances.len().div_ceil(PACKING_FACTOR); + assert!( + chunk_count <= DENSE_LEAF_COUNT, + "balance chunk count ({chunk_count}) exceeds dense tree capacity ({DENSE_LEAF_COUNT})" + ); + + // Allocate tree — initialize all nodes to zeros (matching SSZ zero-padding). + self.nodes.clear(); + self.nodes.resize(DENSE_NODE_COUNT, [0u8; 32]); + self.balances = balances.to_vec(); + + // Pack balances into leaf chunks. + for c in 0..chunk_count { + self.nodes[DENSE_LEAF_COUNT + c] = pack_chunk(balances, c); + } + // Remaining leaf positions are already zero (SSZ default for unpopulated entries). + + // Build internal nodes bottom-up. + for i in (1..DENSE_LEAF_COUNT).rev() { + self.nodes[i] = hash32_concat(&self.nodes[2 * i], &self.nodes[2 * i + 1]); + } + + self.valid = true; + } + + /// Diff the new balances against the cache, incrementally update changed paths, + /// and return the final SSZ VariableList hash (content root + mix_in_length). + fn update_and_root(&mut self, new_balances: &[u64]) -> [u8; 32] { + debug_assert!(self.valid); + + let old_len = self.balances.len(); + let new_len = new_balances.len(); + + if new_len != old_len { + // Balance count changed (new validators added at epoch). Rebuild. + self.initialize(new_balances); + return self.finalize_root(new_len); + } + + // Collect dirty chunk indices. + let mut dirty_chunks: Vec = Vec::with_capacity(600); // ~528 typical + for i in 0..new_len { + if new_balances[i] != self.balances[i] { + let chunk_idx = i / PACKING_FACTOR; + if dirty_chunks.last() != Some(&chunk_idx) { + dirty_chunks.push(chunk_idx); + } + self.balances[i] = new_balances[i]; + } + } + + // Update dirty leaves and walk each path to subtree root. + for &chunk_idx in &dirty_chunks { + let leaf_idx = DENSE_LEAF_COUNT + chunk_idx; + self.nodes[leaf_idx] = pack_chunk(&self.balances, chunk_idx); + + let mut pos = leaf_idx >> 1; + while pos >= 1 { + self.nodes[pos] = hash32_concat(&self.nodes[2 * pos], &self.nodes[2 * pos + 1]); + pos >>= 1; + } + } + + self.finalize_root(new_len) + } + + /// Walk through sparse levels from dense subtree root to content root, + /// then mix_in_length for the final SSZ VariableList hash. + /// Also stores the result as `last_root` for fork validation. + fn finalize_root(&mut self, balance_count: usize) -> [u8; 32] { + let mut root = self.nodes[1]; // dense subtree root + + // Sparse levels: DENSE_DEPTH .. TOTAL_DEPTH-1 + // At each level, our subtree is the left child; right sibling is a zero-hash subtree. + for level in DENSE_DEPTH..TOTAL_DEPTH { + root = hash32_concat(&root, &ZERO_HASHES[level]); + } + + // mix_in_length: hash(content_root || length_as_le_u256) + let mut length_bytes = [0u8; 32]; + length_bytes[0..8].copy_from_slice(&(balance_count as u64).to_le_bytes()); + let result = hash32_concat(&root, &length_bytes); + self.last_root = result; + result + } +} + +/// Pack 4 consecutive u64 values into a 32-byte SSZ chunk (little-endian). +fn pack_chunk(balances: &[u64], chunk_idx: usize) -> [u8; 32] { + let mut chunk = [0u8; 32]; + let start = chunk_idx * PACKING_FACTOR; + for i in 0..PACKING_FACTOR { + let idx = start + i; + if idx < balances.len() { + chunk[i * 8..(i + 1) * 8].copy_from_slice(&balances[idx].to_le_bytes()); + } + } + chunk +} + +static BALANCE_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(BalanceMerkleCache::new())); + +/// Compute the SSZ tree hash root of a `VariableList` incrementally. +/// +/// On the first call, builds the full tree and caches it. On subsequent calls, diffs +/// against the cached balances and only rehashes affected paths. +/// +/// Returns the final 32-byte hash (content root with mix_in_length). +pub fn hash_balances_incremental(balances: &[u64]) -> [u8; 32] { + let mut cache = BALANCE_CACHE.lock().unwrap(); + if cache.valid { + cache.update_and_root(balances) + } else { + cache.initialize(balances); + cache.finalize_root(balances.len()) + } +} + +/// Apply targeted balance updates and return the new hash. +/// `updates` is a list of (index, new_value) pairs. +/// `balance_count` is the current total number of balances. +/// `expected_prev_hash` is the hash the caller expects the cache to currently hold. +/// This validates that the cache corresponds to the correct fork/parent state. +/// +/// The cache must have been initialized by a prior `hash_balances_incremental` call. +/// If the cache is invalid, balance_count doesn't match, or the expected hash doesn't +/// match the cache's last computed root, returns None (caller falls back to full hash). +pub fn apply_updates_and_hash( + updates: &[(u32, u64)], + balance_count: usize, + expected_prev_hash: &[u8; 32], +) -> Option<[u8; 32]> { + let mut cache = BALANCE_CACHE.lock().unwrap(); + if !cache.valid + || cache.balances.len() != balance_count + || &cache.last_root != expected_prev_hash + { + return None; + } + + // Apply updates and collect dirty chunks. + let mut dirty_chunks: Vec = Vec::with_capacity(updates.len() / 4 + 1); + for &(idx, new_val) in updates { + let i = idx as usize; + if i < cache.balances.len() { + cache.balances[i] = new_val; + let chunk_idx = i / PACKING_FACTOR; + if dirty_chunks.last() != Some(&chunk_idx) { + dirty_chunks.push(chunk_idx); + } + } + } + + // Sort dirty chunks to ensure dedup works correctly (updates may not be ordered). + dirty_chunks.sort_unstable(); + dirty_chunks.dedup(); + + // Update dirty leaves and walk each path to subtree root. + for &chunk_idx in &dirty_chunks { + let leaf_idx = DENSE_LEAF_COUNT + chunk_idx; + cache.nodes[leaf_idx] = pack_chunk(&cache.balances, chunk_idx); + + let mut pos = leaf_idx >> 1; + while pos >= 1 { + cache.nodes[pos] = hash32_concat(&cache.nodes[2 * pos], &cache.nodes[2 * pos + 1]); + pos >>= 1; + } + } + + Some(cache.finalize_root(balance_count)) +} + +/// Reset the balance cache. Should be called when the balance vector is resized +/// (e.g., after epoch processing that adds new validators). +#[allow(dead_code)] +pub fn reset_balance_cache() { + BALANCE_CACHE.lock().unwrap().valid = false; +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Compute the balance hash the "original" way (full tree via ssz_types + tree_hash) + /// for comparison. + fn reference_hash(balances: &[u64]) -> [u8; 32] { + use ssz_types::typenum::U1099511627776; + use ssz_types::VariableList; + use tree_hash::TreeHash; + + let list = VariableList::::new(balances.to_vec()).unwrap(); + list.tree_hash_root().0 + } + + #[test] + fn empty_balances() { + reset_balance_cache(); + let balances: Vec = vec![]; + assert_eq!( + hash_balances_incremental(&balances), + reference_hash(&balances) + ); + } + + #[test] + fn small_balances() { + reset_balance_cache(); + let balances: Vec = vec![32_000_000_000; 100]; + assert_eq!( + hash_balances_incremental(&balances), + reference_hash(&balances) + ); + } + + #[test] + fn incremental_update() { + reset_balance_cache(); + let mut balances: Vec = vec![32_000_000_000; 1000]; + + // First call: builds cache + let h1 = hash_balances_incremental(&balances); + assert_eq!(h1, reference_hash(&balances)); + + // Modify a few balances (simulating sync + withdrawals) + balances[42] += 1_000_000; + balances[500] -= 500_000; + balances[999] = 0; + + // Second call: incremental update + let h2 = hash_balances_incremental(&balances); + assert_eq!(h2, reference_hash(&balances)); + assert_ne!(h1, h2); + } + + #[test] + fn cross_chunk_boundary() { + reset_balance_cache(); + let mut balances: Vec = vec![1; 8]; // 2 chunks + + let h1 = hash_balances_incremental(&balances); + assert_eq!(h1, reference_hash(&balances)); + + // Change last element of chunk 0 and first element of chunk 1 + balances[3] = 99; + balances[4] = 99; + + let h2 = hash_balances_incremental(&balances); + assert_eq!(h2, reference_hash(&balances)); + } + + #[test] + fn targeted_updates() { + reset_balance_cache(); + let mut balances: Vec = vec![32_000_000_000; 1000]; + + // Initialize cache via full hash + let _h1 = hash_balances_incremental(&balances); + + // Apply targeted updates (simulating sync + withdrawal) + let updates: Vec<(u32, u64)> = vec![ + (42, balances[42] + 1_000_000), + (500, balances[500].saturating_sub(500_000)), + (999, 0), + ]; + // Also update our reference copy + balances[42] += 1_000_000; + balances[500] -= 500_000; + balances[999] = 0; + + let h2 = apply_updates_and_hash(&updates, 1000, &_h1).unwrap(); + assert_eq!(h2, reference_hash(&balances)); + } + + #[test] + fn targeted_updates_returns_none_when_invalid() { + reset_balance_cache(); + let updates = vec![(0, 100u64)]; + let fake_hash = [0u8; 32]; + assert_eq!(apply_updates_and_hash(&updates, 100, &fake_hash), None); + } + + #[test] + fn balance_count_change_triggers_rebuild() { + reset_balance_cache(); + let balances: Vec = vec![32_000_000_000; 100]; + let h1 = hash_balances_incremental(&balances); + assert_eq!(h1, reference_hash(&balances)); + + // Add a new validator (epoch boundary deposit) + let mut balances2 = balances.clone(); + balances2.push(32_000_000_000); + let h2 = hash_balances_incremental(&balances2); + assert_eq!(h2, reference_hash(&balances2)); + assert_ne!(h1, h2); + } +} diff --git a/native/ssz_nif/src/utils/cached_hash.rs b/native/ssz_nif/src/utils/cached_hash.rs new file mode 100644 index 000000000..8fb4c182d --- /dev/null +++ b/native/ssz_nif/src/utils/cached_hash.rs @@ -0,0 +1,292 @@ +use rustler::{Atom, Binary, Decoder, Env, NifResult, Term}; +use std::collections::HashMap; +use tree_hash::{MerkleHasher, TreeHash}; + +use crate::ssz_types::config::Config; +use crate::utils::from_elx::{FromElx, FromElxError}; + +use ssz::Decode; + +/// Helper to convert a field from Elixir Term to SSZ type and hash it. +fn convert_and_hash<'a, Elx, Ssz>(field_term: Term<'a>) -> NifResult<[u8; 32]> +where + Elx: Decoder<'a>, + Ssz: TreeHash + FromElx, +{ + let elx_val = Elx::decode(field_term)?; + let ssz_val = Ssz::from(elx_val) + .map_err(|e: FromElxError| rustler::Error::Term(Box::new(e.to_string())))?; + Ok(ssz_val.tree_hash_root().0) +} + +/// Helper to convert a Vec field and compute its tree hash root as a VariableList. +fn convert_and_hash_list<'a, Elx, Ssz, N>(field_term: Term<'a>) -> NifResult<[u8; 32]> +where + Elx: Decoder<'a>, + Ssz: TreeHash + FromElx, + N: ssz_types::typenum::Unsigned, +{ + let elx_vec: Vec = Decoder::decode(field_term)?; + let ssz_vec: Vec = elx_vec + .into_iter() + .map(FromElx::from) + .collect::, _>>() + .map_err(|e: FromElxError| rustler::Error::Term(Box::new(e.to_string())))?; + let list = ssz_types::VariableList::::new(ssz_vec) + .map_err(|e| rustler::Error::Term(Box::new(format!("{e:?}"))))?; + Ok(list.tree_hash_root().0) +} + +/// Helper to convert a Vec field and compute its tree hash root as a FixedVector. +fn convert_and_hash_vector<'a, Elx, Ssz, N>(field_term: Term<'a>) -> NifResult<[u8; 32]> +where + Elx: Decoder<'a>, + Ssz: TreeHash + FromElx, + N: ssz_types::typenum::Unsigned, +{ + let elx_vec: Vec = Decoder::decode(field_term)?; + let ssz_vec: Vec = elx_vec + .into_iter() + .map(FromElx::from) + .collect::, _>>() + .map_err(|e: FromElxError| rustler::Error::Term(Box::new(e.to_string())))?; + let vector = ssz_types::FixedVector::::new(ssz_vec) + .map_err(|e| rustler::Error::Term(Box::new(format!("{e:?}"))))?; + Ok(vector.tree_hash_root().0) +} + +/// Helper to convert a Binary field (BitVector) and hash it. +fn convert_and_hash_bitvector<'a, N>(field_term: Term<'a>) -> NifResult<[u8; 32]> +where + N: ssz_types::typenum::Unsigned, +{ + let bin = Binary::from_term(field_term)?; + let bv = ssz_types::BitVector::::from_ssz_bytes(&bin) + .map_err(|e| rustler::Error::Term(Box::new(format!("{e:?}"))))?; + Ok(bv.tree_hash_root().0) +} + +/// Get a field from an Elixir struct by atom name. +fn get_field<'a>(env: Env<'a>, state: Term<'a>, field_name: &str) -> NifResult> { + let atom = Atom::from_str(env, field_name)?; + state.map_get(atom.to_term(env)) +} + +/// Result of hash_beacon_state_cached: the root hash and all individual field hashes. +pub(crate) struct CachedHashResult { + pub root: [u8; 32], + /// All field hashes concatenated: field_count * 32 bytes + pub field_hashes: Vec, +} + +/// Hash a BeaconState with cached field hashes. +/// `cached_hashes` maps field index (0-based) to pre-computed 32-byte hash. +/// Fields not in the cache are computed from the state. +/// Returns the root hash and all individual field hashes (for caching by the caller). +pub(crate) fn hash_beacon_state_cached<'a, C: Config>( + env: Env<'a>, + state: Term<'a>, + cached_hashes: &HashMap, +) -> NifResult { + // BeaconState fields in schema order (must match Rust struct AND Elixir schema) + let field_names: &[&str] = &[ + "genesis_time", // 0 + "genesis_validators_root", // 1 + "slot", // 2 + "fork", // 3 + "latest_block_header", // 4 + "block_roots", // 5 + "state_roots", // 6 + "historical_roots", // 7 + "eth1_data", // 8 + "eth1_data_votes", // 9 + "eth1_deposit_index", // 10 + "validators", // 11 + "balances", // 12 + "randao_mixes", // 13 + "slashings", // 14 + "previous_epoch_participation", // 15 + "current_epoch_participation", // 16 + "justification_bits", // 17 + "previous_justified_checkpoint", // 18 + "current_justified_checkpoint", // 19 + "finalized_checkpoint", // 20 + "inactivity_scores", // 21 + "current_sync_committee", // 22 + "next_sync_committee", // 23 + "latest_execution_payload_header", // 24 + "next_withdrawal_index", // 25 + "next_withdrawal_validator_index", // 26 + "historical_summaries", // 27 + "deposit_requests_start_index", // 28 + "deposit_balance_to_consume", // 29 + "exit_balance_to_consume", // 30 + "earliest_exit_epoch", // 31 + "consolidation_balance_to_consume", // 32 + "earliest_consolidation_epoch", // 33 + "pending_deposits", // 34 + "pending_partial_withdrawals", // 35 + "pending_consolidations", // 36 + "proposer_lookahead", // 37 + ]; + + let num_fields = field_names.len(); + let mut hasher = MerkleHasher::with_leaves(num_fields); + let mut all_field_hashes: Vec = Vec::with_capacity(num_fields * 32); + + for (idx, &name) in field_names.iter().enumerate() { + let hash = if let Some(cached) = cached_hashes.get(&(idx as u32)) { + *cached + } else { + let field = get_field(env, state, name)?; + compute_field_hash::(idx, field)? + }; + all_field_hashes.extend_from_slice(&hash); + hasher + .write(&hash) + .map_err(|e| rustler::Error::Term(Box::new(format!("{e:?}"))))?; + } + + let root = hasher + .finish() + .map_err(|e| rustler::Error::Term(Box::new(format!("{e:?}"))))?; + Ok(CachedHashResult { + root: root.0, + field_hashes: all_field_hashes, + }) +} + +/// Compute the tree hash of a single BeaconState field by index. +fn compute_field_hash<'a, C: Config>(field_index: usize, field: Term<'a>) -> NifResult<[u8; 32]> { + use crate::elx_types; + use crate::ssz_types; + + match field_index { + // Scalar u64 fields + 0 | 2 | 10 | 25 | 26 | 28 | 29 | 30 | 31 | 32 | 33 => { + let val: u64 = field.decode()?; + Ok(val.tree_hash_root().0) + } + // Root (Bytes32) field: genesis_validators_root + 1 => { + let bin = Binary::from_term(field)?; + let arr: [u8; 32] = bin + .as_slice() + .try_into() + .map_err(|_| rustler::Error::BadArg)?; + Ok(arr.tree_hash_root().0) + } + // Fork + 3 => convert_and_hash::(field), + // BeaconBlockHeader + 4 => convert_and_hash::(field), + // block_roots: FixedVector + 5 => convert_and_hash_vector::(field), + // state_roots: FixedVector + 6 => convert_and_hash_vector::(field), + // historical_roots: VariableList + 7 => convert_and_hash_list::(field), + // eth1_data + 8 => convert_and_hash::(field), + // eth1_data_votes: VariableList + 9 => convert_and_hash_list::< + elx_types::Eth1Data, + ssz_types::Eth1Data, + C::SlotsPerEth1VotingPeriod, + >(field), + // validators: VariableList + 11 => convert_and_hash_list::< + elx_types::Validator, + ssz_types::Validator, + C::ValidatorRegistryLimit, + >(field), + // balances: VariableList + // Use incremental merkle cache: decode the Vec and hand it to the + // balance cache which diffs against its previous state and only rehashes + // the changed chunks. + 12 => { + let balances: Vec = Decoder::decode(field)?; + Ok(crate::utils::balance_cache::hash_balances_incremental( + &balances, + )) + } + // randao_mixes: FixedVector + // Decode once, compute standard SSZ hash, and seed the incremental cache + // so that subsequent Elixir-side targeted updates can skip this conversion. + 13 => { + let binaries: Vec = Decoder::decode(field)?; + let ssz_vec: Vec<[u8; 32]> = binaries + .into_iter() + .map(|b| FromElx::from(b)) + .collect::, _>>() + .map_err(|e: FromElxError| rustler::Error::Term(Box::new(e.to_string())))?; + let vector = ::ssz_types::FixedVector::<[u8; 32], C::EpochsPerHistoricalVector>::new( + ssz_vec.clone(), + ) + .map_err(|e| rustler::Error::Term(Box::new(format!("{e:?}"))))?; + let result = vector.tree_hash_root().0; + // Seed the cache so targeted updates work on subsequent blocks. + crate::utils::randao_cache::seed_cache(&ssz_vec, &result); + Ok(result) + } + // slashings: FixedVector + 14 => convert_and_hash_vector::(field), + // previous_epoch_participation: VariableList + // Use incremental merkle cache: decode the Vec and hand it to the + // participation cache which diffs against its previous state. + 15 => { + let values: Vec = Decoder::decode(field)?; + Ok(crate::utils::participation_cache::hash_participation_incremental(15, &values)) + } + // current_epoch_participation: VariableList + 16 => { + let values: Vec = Decoder::decode(field)?; + Ok(crate::utils::participation_cache::hash_participation_incremental(16, &values)) + } + // justification_bits: BitVector + 17 => convert_and_hash_bitvector::(field), + // Checkpoints + 18 | 19 | 20 => convert_and_hash::(field), + // inactivity_scores: VariableList + 21 => convert_and_hash_list::(field), + // current_sync_committee + 22 => convert_and_hash::>(field), + // next_sync_committee + 23 => convert_and_hash::>(field), + // latest_execution_payload_header + 24 => convert_and_hash::< + elx_types::ExecutionPayloadHeader, + ssz_types::ExecutionPayloadHeader, + >(field), + // historical_summaries: VariableList + 27 => convert_and_hash_list::< + elx_types::HistoricalSummary, + ssz_types::HistoricalSummary, + C::HistoricalRootsLimit, + >(field), + // pending_deposits: VariableList + 34 => convert_and_hash_list::< + elx_types::PendingDeposit, + ssz_types::PendingDeposit, + C::PendingDepositsLimit, + >(field), + // pending_partial_withdrawals + 35 => convert_and_hash_list::< + elx_types::PendingPartialWithdrawal, + ssz_types::PendingPartialWithdrawal, + C::PendingPartialWithdrawalsLimit, + >(field), + // pending_consolidations + 36 => convert_and_hash_list::< + elx_types::PendingConsolidation, + ssz_types::PendingConsolidation, + C::PendingConsolidationsLimit, + >(field), + // proposer_lookahead: FixedVector + 37 => convert_and_hash_vector::(field), + + _ => Err(rustler::Error::Term(Box::new(format!( + "Unknown field index: {field_index}" + )))), + } +} diff --git a/native/ssz_nif/src/utils/mod.rs b/native/ssz_nif/src/utils/mod.rs index 189cc65e7..55e104d18 100644 --- a/native/ssz_nif/src/utils/mod.rs +++ b/native/ssz_nif/src/utils/mod.rs @@ -1,6 +1,11 @@ +pub(crate) mod balance_cache; +pub(crate) mod cached_hash; pub(crate) mod from_elx; pub(crate) mod from_ssz; pub(crate) mod helpers; +pub(crate) mod participation_cache; +pub(crate) mod randao_cache; +pub(crate) mod shuffle; /// New containers should be added to this macro macro_rules! schema_match { diff --git a/native/ssz_nif/src/utils/participation_cache.rs b/native/ssz_nif/src/utils/participation_cache.rs new file mode 100644 index 000000000..104f25a2b --- /dev/null +++ b/native/ssz_nif/src/utils/participation_cache.rs @@ -0,0 +1,365 @@ +//! Incremental merkle tree cache for BeaconState participation fields (15, 16). +//! +//! SSZ hashes `VariableList` as a binary merkle tree +//! where every 32 u8 values are packed into one 32-byte leaf chunk. With ~2.2M +//! validators the tree has ~68.75K populated leaves out of 2^35 total positions. +//! +//! On non-epoch blocks, only ~4K-8K participation entries change (attesting validators), +//! so rebuilding the entire tree is wasteful. This module caches two trees (one for +//! previous_epoch_participation, one for current_epoch_participation) and updates +//! only the affected paths on subsequent calls. +//! +//! ## Tree layout +//! +//! A dense subtree covers the first `DENSE_LEAF_COUNT` (2^17 = 131072) leaf positions, +//! enough for 4.2M validators (32 u8s per chunk * 131072 chunks). Above that, 18 +//! sparse levels use precomputed zero-hashes. +//! +//! Dense flat array (1-indexed): +//! nodes[1] = subtree root +//! nodes[2], nodes[3] = depth 1 +//! ... +//! nodes[DENSE_LEAF_COUNT .. 2*DENSE_LEAF_COUNT - 1] = leaves (packed u8 chunks) + +use std::sync::{LazyLock, Mutex}; + +use ethereum_hashing::{hash32_concat, ZERO_HASHES}; + +/// u8 packing factor: 32 u8s (32 bytes) per leaf chunk. +const PACKING_FACTOR: usize = 32; + +/// Dense subtree depth. 2^17 = 131,072 leaf positions → supports up to 4.2M validators. +const DENSE_DEPTH: usize = 17; +const DENSE_LEAF_COUNT: usize = 1 << DENSE_DEPTH; // 131,072 +const DENSE_NODE_COUNT: usize = 2 * DENSE_LEAF_COUNT; // 262,144 + +/// Total tree depth for `VariableList`. +/// ValidatorRegistryLimit = 2^40 for all configs. max_chunks = 2^40/32 = 2^35. depth = 35. +const TOTAL_DEPTH: usize = 35; + +struct ParticipationMerkleCache { + /// Flat binary tree: nodes[1] = subtree root, leaves at [DENSE_LEAF_COUNT .. 2*DENSE_LEAF_COUNT). + /// Index 0 is unused. + nodes: Vec<[u8; 32]>, + /// Cached participation values for diffing. + values: Vec, + /// Whether the cache has been initialized. + valid: bool, + /// The last computed root hash, used to validate fork consistency. + last_root: [u8; 32], +} + +impl ParticipationMerkleCache { + fn new() -> Self { + Self { + nodes: Vec::new(), + values: Vec::new(), + valid: false, + last_root: [0u8; 32], + } + } + + /// Build the full tree from scratch. + fn initialize(&mut self, values: &[u8]) { + let chunk_count = values.len().div_ceil(PACKING_FACTOR); + assert!( + chunk_count <= DENSE_LEAF_COUNT, + "participation chunk count ({chunk_count}) exceeds dense tree capacity ({DENSE_LEAF_COUNT})" + ); + + // Allocate tree — initialize all nodes to zeros (matching SSZ zero-padding). + self.nodes.clear(); + self.nodes.resize(DENSE_NODE_COUNT, [0u8; 32]); + self.values = values.to_vec(); + + // Pack values into leaf chunks. + for c in 0..chunk_count { + self.nodes[DENSE_LEAF_COUNT + c] = pack_chunk(values, c); + } + // Remaining leaf positions are already zero (SSZ default for unpopulated entries). + + // Build internal nodes bottom-up. + for i in (1..DENSE_LEAF_COUNT).rev() { + self.nodes[i] = hash32_concat(&self.nodes[2 * i], &self.nodes[2 * i + 1]); + } + + self.valid = true; + } + + /// Diff the new values against the cache, incrementally update changed paths, + /// and return the final SSZ VariableList hash (content root + mix_in_length). + fn update_and_root(&mut self, new_values: &[u8]) -> [u8; 32] { + debug_assert!(self.valid); + + let old_len = self.values.len(); + let new_len = new_values.len(); + + if new_len != old_len { + // Value count changed (new validators added at epoch). Rebuild. + self.initialize(new_values); + return self.finalize_root(new_len); + } + + // Collect dirty chunk indices. + let mut dirty_chunks: Vec = Vec::with_capacity(300); + for i in 0..new_len { + if new_values[i] != self.values[i] { + let chunk_idx = i / PACKING_FACTOR; + if dirty_chunks.last() != Some(&chunk_idx) { + dirty_chunks.push(chunk_idx); + } + self.values[i] = new_values[i]; + } + } + + // Update dirty leaves and walk each path to subtree root. + for &chunk_idx in &dirty_chunks { + let leaf_idx = DENSE_LEAF_COUNT + chunk_idx; + self.nodes[leaf_idx] = pack_chunk(&self.values, chunk_idx); + + let mut pos = leaf_idx >> 1; + while pos >= 1 { + self.nodes[pos] = hash32_concat(&self.nodes[2 * pos], &self.nodes[2 * pos + 1]); + pos >>= 1; + } + } + + self.finalize_root(new_len) + } + + /// Walk through sparse levels from dense subtree root to content root, + /// then mix_in_length for the final SSZ VariableList hash. + /// Also stores the result as `last_root` for fork validation. + fn finalize_root(&mut self, value_count: usize) -> [u8; 32] { + let mut root = self.nodes[1]; // dense subtree root + + // Sparse levels: DENSE_DEPTH .. TOTAL_DEPTH-1 + // At each level, our subtree is the left child; right sibling is a zero-hash subtree. + for level in DENSE_DEPTH..TOTAL_DEPTH { + root = hash32_concat(&root, &ZERO_HASHES[level]); + } + + // mix_in_length: hash(content_root || length_as_le_u256) + let mut length_bytes = [0u8; 32]; + length_bytes[0..8].copy_from_slice(&(value_count as u64).to_le_bytes()); + let result = hash32_concat(&root, &length_bytes); + self.last_root = result; + result + } +} + +/// Pack 32 consecutive u8 values into a 32-byte SSZ chunk. +fn pack_chunk(values: &[u8], chunk_idx: usize) -> [u8; 32] { + let mut chunk = [0u8; 32]; + let start = chunk_idx * PACKING_FACTOR; + let end = (start + PACKING_FACTOR).min(values.len()); + let count = end - start; + chunk[..count].copy_from_slice(&values[start..end]); + chunk +} + +// Two global caches: one for previous_epoch_participation, one for current_epoch_participation. +static PREV_PARTICIPATION_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(ParticipationMerkleCache::new())); + +static CURR_PARTICIPATION_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(ParticipationMerkleCache::new())); + +fn get_cache(field_num: u32) -> &'static Mutex { + match field_num { + 15 => &PREV_PARTICIPATION_CACHE, + 16 => &CURR_PARTICIPATION_CACHE, + _ => panic!("Invalid participation field number: {field_num}"), + } +} + +/// Compute the SSZ tree hash root of a participation VariableList incrementally. +/// +/// `field_num` is 15 (previous) or 16 (current). +/// On the first call, builds the full tree and caches it. On subsequent calls, diffs +/// against the cached values and only rehashes affected paths. +pub fn hash_participation_incremental(field_num: u32, values: &[u8]) -> [u8; 32] { + let mut cache = get_cache(field_num).lock().unwrap(); + if cache.valid { + cache.update_and_root(values) + } else { + cache.initialize(values); + cache.finalize_root(values.len()) + } +} + +/// Apply targeted participation updates and return the new hash. +/// `field_num` is 15 (previous) or 16 (current). +/// `updates` is a list of (index, new_value) pairs. +/// `value_count` is the current total number of participation entries. +/// `expected_prev_hash` validates that the cache corresponds to the correct fork. +/// +/// The cache must have been initialized by a prior `hash_participation_incremental` call. +/// If the cache is invalid, value_count doesn't match, or the expected hash doesn't +/// match, returns None (caller falls back to full hash). +pub fn apply_participation_updates( + field_num: u32, + updates: &[(u32, u8)], + value_count: usize, + expected_prev_hash: &[u8; 32], +) -> Option<[u8; 32]> { + let mut cache = get_cache(field_num).lock().unwrap(); + if !cache.valid || cache.values.len() != value_count || &cache.last_root != expected_prev_hash { + return None; + } + + // Apply updates and collect dirty chunks. + let mut dirty_chunks: Vec = Vec::with_capacity(updates.len() / PACKING_FACTOR + 1); + for &(idx, new_val) in updates { + let i = idx as usize; + if i < cache.values.len() { + cache.values[i] = new_val; + let chunk_idx = i / PACKING_FACTOR; + if dirty_chunks.last() != Some(&chunk_idx) { + dirty_chunks.push(chunk_idx); + } + } + } + + // Sort dirty chunks to ensure dedup works correctly (updates may not be ordered). + dirty_chunks.sort_unstable(); + dirty_chunks.dedup(); + + // Update dirty leaves and walk each path to subtree root. + for &chunk_idx in &dirty_chunks { + let leaf_idx = DENSE_LEAF_COUNT + chunk_idx; + cache.nodes[leaf_idx] = pack_chunk(&cache.values, chunk_idx); + + let mut pos = leaf_idx >> 1; + while pos >= 1 { + cache.nodes[pos] = hash32_concat(&cache.nodes[2 * pos], &cache.nodes[2 * pos + 1]); + pos >>= 1; + } + } + + Some(cache.finalize_root(value_count)) +} + +/// Reset a participation cache. +#[allow(dead_code)] +pub fn reset_participation_cache(field_num: u32) { + get_cache(field_num).lock().unwrap().valid = false; +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Compute the participation hash the "original" way (full tree via ssz_types + tree_hash) + /// for comparison. + fn reference_hash(values: &[u8]) -> [u8; 32] { + use ssz_types::typenum::U1099511627776; + use ssz_types::VariableList; + use tree_hash::TreeHash; + + let list = VariableList::::new(values.to_vec()).unwrap(); + list.tree_hash_root().0 + } + + #[test] + fn empty_participation() { + reset_participation_cache(15); + let values: Vec = vec![]; + assert_eq!( + hash_participation_incremental(15, &values), + reference_hash(&values) + ); + } + + #[test] + fn small_participation() { + reset_participation_cache(15); + let values: Vec = vec![7; 100]; + assert_eq!( + hash_participation_incremental(15, &values), + reference_hash(&values) + ); + } + + #[test] + fn incremental_update() { + reset_participation_cache(16); + let mut values: Vec = vec![0; 1000]; + + // First call: builds cache + let h1 = hash_participation_incremental(16, &values); + assert_eq!(h1, reference_hash(&values)); + + // Modify a few entries (simulating attestation flag updates) + values[42] = 7; + values[500] = 3; + values[999] = 5; + + // Second call: incremental update + let h2 = hash_participation_incremental(16, &values); + assert_eq!(h2, reference_hash(&values)); + assert_ne!(h1, h2); + } + + #[test] + fn cross_chunk_boundary() { + reset_participation_cache(15); + let mut values: Vec = vec![0; 64]; // 2 chunks + + let h1 = hash_participation_incremental(15, &values); + assert_eq!(h1, reference_hash(&values)); + + // Change last element of chunk 0 and first element of chunk 1 + values[31] = 7; + values[32] = 3; + + let h2 = hash_participation_incremental(15, &values); + assert_eq!(h2, reference_hash(&values)); + } + + #[test] + fn targeted_updates() { + reset_participation_cache(15); + let mut values: Vec = vec![0; 1000]; + + // Initialize cache via full hash + let _h1 = hash_participation_incremental(15, &values); + + // Apply targeted updates (simulating attestation flags) + let updates: Vec<(u32, u8)> = vec![(42, 7), (500, 3), (999, 5)]; + // Also update our reference copy + values[42] = 7; + values[500] = 3; + values[999] = 5; + + let h2 = apply_participation_updates(15, &updates, 1000, &_h1).unwrap(); + assert_eq!(h2, reference_hash(&values)); + } + + #[test] + fn targeted_updates_returns_none_when_invalid() { + reset_participation_cache(16); + let updates = vec![(0, 7u8)]; + let fake_hash = [0u8; 32]; + assert_eq!( + apply_participation_updates(16, &updates, 100, &fake_hash), + None + ); + } + + #[test] + fn separate_caches_for_prev_and_curr() { + reset_participation_cache(15); + reset_participation_cache(16); + + let prev_values: Vec = vec![7; 100]; + let curr_values: Vec = vec![3; 100]; + + let h_prev = hash_participation_incremental(15, &prev_values); + let h_curr = hash_participation_incremental(16, &curr_values); + + assert_eq!(h_prev, reference_hash(&prev_values)); + assert_eq!(h_curr, reference_hash(&curr_values)); + assert_ne!(h_prev, h_curr); + } +} diff --git a/native/ssz_nif/src/utils/randao_cache.rs b/native/ssz_nif/src/utils/randao_cache.rs new file mode 100644 index 000000000..9afc217ac --- /dev/null +++ b/native/ssz_nif/src/utils/randao_cache.rs @@ -0,0 +1,215 @@ +//! Incremental merkle tree cache for BeaconState randao_mixes (field 13). +//! +//! SSZ hashes `FixedVector` as a binary merkle tree +//! where each 32-byte entry is one leaf (no packing needed). With 65536 entries +//! (mainnet), only 1 entry changes per block, so rebuilding is wasteful. +//! +//! This module caches the tree and updates only the 16 nodes on the path from +//! the modified leaf to the root (~16 hash operations instead of ~65536). + +use std::sync::{LazyLock, Mutex}; + +use ethereum_hashing::hash32_concat; + +struct RandaoMerkleCache { + /// Flat binary tree: nodes[1] = root, leaves at [leaf_count .. 2*leaf_count). + nodes: Vec<[u8; 32]>, + /// Number of leaf positions (next power of 2 >= vector size). + leaf_count: usize, + /// Whether the cache has been initialized. + valid: bool, + /// The last computed root hash for fork validation. + last_root: [u8; 32], +} + +impl RandaoMerkleCache { + fn new() -> Self { + Self { + nodes: Vec::new(), + leaf_count: 0, + valid: false, + last_root: [0u8; 32], + } + } + + /// Build the full tree from scratch. + fn initialize(&mut self, values: &[[u8; 32]]) { + let leaf_count = values.len().next_power_of_two(); + let node_count = 2 * leaf_count; + self.leaf_count = leaf_count; + + self.nodes.resize(node_count, [0u8; 32]); + + // Copy values directly as leaves (no packing needed). + for (i, v) in values.iter().enumerate() { + self.nodes[self.leaf_count + i] = *v; + } + // Zero remaining leaves. + for i in values.len()..self.leaf_count { + self.nodes[self.leaf_count + i] = [0u8; 32]; + } + + // Build internal nodes bottom-up. + for i in (1..self.leaf_count).rev() { + self.nodes[i] = hash32_concat(&self.nodes[2 * i], &self.nodes[2 * i + 1]); + } + + self.valid = true; + } + + /// Get the root (nodes[1] for FixedVector — no mix_in_length). + fn root(&mut self) -> [u8; 32] { + let result = self.nodes[1]; + self.last_root = result; + result + } +} + +static RANDAO_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(RandaoMerkleCache::new())); + +/// Compute the SSZ tree hash root of a FixedVector incrementally. +/// +/// On the first call, builds the full tree and caches it. On subsequent calls, diffs +/// against the cached values and only rehashes affected paths. +#[allow(dead_code)] +pub fn hash_randao_incremental(values: &[[u8; 32]]) -> [u8; 32] { + let mut cache = RANDAO_CACHE.lock().unwrap(); + if cache.valid && cache.leaf_count >= values.len() { + // Diff and update only changed leaves. + let leaf_count = cache.leaf_count; + for i in 0..values.len() { + let leaf_idx = leaf_count + i; + if cache.nodes[leaf_idx] != values[i] { + cache.nodes[leaf_idx] = values[i]; + // Walk up to root. + let mut pos = leaf_idx >> 1; + while pos >= 1 { + cache.nodes[pos] = + hash32_concat(&cache.nodes[2 * pos], &cache.nodes[2 * pos + 1]); + pos >>= 1; + } + } + } + cache.root() + } else { + cache.initialize(values); + cache.root() + } +} + +/// Seed the cache with known-correct data and hash from the standard SSZ hash path. +/// This allows subsequent `apply_randao_update` calls to work incrementally. +pub fn seed_cache(values: &[[u8; 32]], known_hash: &[u8; 32]) { + let mut cache = RANDAO_CACHE.lock().unwrap(); + cache.initialize(values); + cache.last_root = *known_hash; +} + +/// Apply a single targeted update and return the new hash. +/// `index` is the position to update, `new_value` is the new 32-byte entry. +/// `expected_prev_hash` validates the cache matches the expected parent state. +/// +/// Returns None on cache miss (caller falls back to full hash). +pub fn apply_randao_update( + index: usize, + new_value: &[u8; 32], + total_count: usize, + expected_prev_hash: &[u8; 32], +) -> Option<[u8; 32]> { + let mut cache = RANDAO_CACHE.lock().unwrap(); + if !cache.valid || &cache.last_root != expected_prev_hash { + return None; + } + + let leaf_count = cache.leaf_count; + if index >= leaf_count || total_count > leaf_count { + return None; + } + + let leaf_idx = leaf_count + index; + cache.nodes[leaf_idx] = *new_value; + + // Walk up to root. + let mut pos = leaf_idx >> 1; + while pos >= 1 { + cache.nodes[pos] = hash32_concat(&cache.nodes[2 * pos], &cache.nodes[2 * pos + 1]); + pos >>= 1; + } + + Some(cache.root()) +} + +#[cfg(test)] +mod tests { + use super::*; + use ethereum_hashing::hash32_concat; + + fn reference_hash(values: &[[u8; 32]]) -> [u8; 32] { + let leaf_count = values.len().next_power_of_two(); + let mut nodes = vec![[0u8; 32]; 2 * leaf_count]; + for (i, v) in values.iter().enumerate() { + nodes[leaf_count + i] = *v; + } + for i in (1..leaf_count).rev() { + nodes[i] = hash32_concat(&nodes[2 * i], &nodes[2 * i + 1]); + } + nodes[1] + } + + fn reset_cache() { + RANDAO_CACHE.lock().unwrap().valid = false; + } + + #[test] + fn small_values() { + reset_cache(); + let mut values = vec![[0u8; 32]; 16]; + values[0] = [1u8; 32]; + values[5] = [42u8; 32]; + + let h1 = hash_randao_incremental(&values); + assert_eq!(h1, reference_hash(&values)); + } + + #[test] + fn incremental_update() { + reset_cache(); + let mut values = vec![[0u8; 32]; 64]; + for (i, v) in values.iter_mut().enumerate() { + v[0] = i as u8; + } + + let _h1 = hash_randao_incremental(&values); + + // Modify one value. + values[10] = [255u8; 32]; + let h2 = hash_randao_incremental(&values); + assert_eq!(h2, reference_hash(&values)); + } + + #[test] + fn targeted_update() { + reset_cache(); + let mut values = vec![[0u8; 32]; 32]; + for (i, v) in values.iter_mut().enumerate() { + v[0] = i as u8; + } + + let h1 = hash_randao_incremental(&values); + + let new_value = [99u8; 32]; + let h2 = apply_randao_update(10, &new_value, 32, &h1).unwrap(); + + values[10] = new_value; + assert_eq!(h2, reference_hash(&values)); + } + + #[test] + fn targeted_update_cache_miss() { + reset_cache(); + let new_value = [99u8; 32]; + let fake_hash = [0u8; 32]; + assert_eq!(apply_randao_update(10, &new_value, 32, &fake_hash), None); + } +} diff --git a/native/ssz_nif/src/utils/shuffle.rs b/native/ssz_nif/src/utils/shuffle.rs new file mode 100644 index 000000000..5455ffb05 --- /dev/null +++ b/native/ssz_nif/src/utils/shuffle.rs @@ -0,0 +1,196 @@ +use sha2::{Digest, Sha256}; + +fn sha256(data: &[u8]) -> Vec { + let mut hasher = Sha256::new(); + hasher.update(data); + hasher.finalize().to_vec() +} + +/// Compute the shuffled index for a single position (eth2 spec compute_shuffled_index). +pub fn compute_shuffled_index( + mut index: u64, + index_count: u64, + seed: &[u8; 32], + rounds: u32, +) -> u64 { + if index_count == 0 { + return index; + } + for round in 0..rounds { + let round_byte = round as u8; + let mut buf = Vec::with_capacity(33); + buf.extend_from_slice(seed); + buf.push(round_byte); + let pivot_hash = sha256(&buf); + let pivot = u64::from_le_bytes(pivot_hash[..8].try_into().unwrap()) % index_count; + + let flip = (pivot + index_count - index) % index_count; + let position = std::cmp::max(index, flip); + + let pos_div_256 = (position / 256) as u32; + let mut buf2 = Vec::with_capacity(37); + buf2.extend_from_slice(seed); + buf2.push(round_byte); + buf2.extend_from_slice(&pos_div_256.to_le_bytes()); + let source = sha256(&buf2); + + let bit_index = (position % 256) as usize; + let byte_val = source[bit_index / 8]; + let bit = (byte_val >> (bit_index % 8)) & 1; + + if bit == 1 { + index = flip; + } + } + index +} + +/// Batch compute proposer indices for all slots in an epoch. +/// For each slot, finds the first candidate whose effective balance passes +/// the random threshold. This replaces ~2048 individual Elixir NIF calls. +pub fn compute_proposer_indices( + epoch_seed: &[u8; 32], + start_slot: u64, + slots_per_epoch: u32, + active_indices: &[u64], + effective_balances: &[u64], + max_effective_balance: u64, + rounds: u32, +) -> Vec { + let total = active_indices.len() as u64; + let max_random: u64 = 0xFFFF; // 2^16 - 1 + + (0..slots_per_epoch) + .map(|i| { + // Per-slot seed + let slot = start_slot + i as u64; + let mut slot_seed_input = Vec::with_capacity(40); + slot_seed_input.extend_from_slice(epoch_seed); + slot_seed_input.extend_from_slice(&slot.to_le_bytes()); + let slot_seed_vec = sha256(&slot_seed_input); + let slot_seed: [u8; 32] = slot_seed_vec[..32].try_into().unwrap(); + + // Find proposer + let mut candidate_iter = 0u64; + loop { + let shuffled = + compute_shuffled_index(candidate_iter % total, total, &slot_seed, rounds); + let candidate_index = active_indices[shuffled as usize]; + + // Random bytes + let mut rand_input = Vec::with_capacity(40); + rand_input.extend_from_slice(&slot_seed); + rand_input.extend_from_slice(&(candidate_iter / 16).to_le_bytes()); + let random_bytes = sha256(&rand_input); + let offset = ((candidate_iter % 16) * 2) as usize; + let random_value = + u16::from_le_bytes([random_bytes[offset], random_bytes[offset + 1]]) as u64; + + let eff_bal = effective_balances[candidate_index as usize]; + + if eff_bal * max_random >= max_effective_balance * random_value { + break candidate_index; + } + candidate_iter += 1; + } + }) + .collect() +} + +/// Perform the full eth2 shuffle in Rust with O(1) array access. +/// This replaces the Elixir implementation that uses :atomics + Enum.reduce. +/// +/// Algorithm: eth2 spec `compute_shuffled_index` applied as a full Fisher-Yates +/// shuffle over all indices, using the swap-or-not network. +pub fn shuffle_list(indices: &mut [u64], seed: &[u8; 32], rounds: u32) { + let n = indices.len(); + if n <= 1 { + return; + } + + for round in (0..rounds).rev() { + let round_byte = round as u8; + + // Compute pivot = hash(seed || round_byte) mod n + let pivot = { + let mut hasher = Sha256::new(); + hasher.update(seed); + hasher.update([round_byte]); + let hash = hasher.finalize(); + u64::from_le_bytes(hash[..8].try_into().unwrap()) % (n as u64) + } as usize; + + // First half: i in [0, mirror) + let mirror = (pivot + 1) / 2; + let mut source = { + let pos_bytes = ((pivot / 256) as u32).to_le_bytes(); + let mut hasher = Sha256::new(); + hasher.update(seed); + hasher.update([round_byte]); + hasher.update(pos_bytes); + hasher.finalize().to_vec() + }; + let mut byte_v = source[(pivot & 0xFF) / 8]; + + for i in 0..mirror { + let j = pivot - i; + + // Update source hash when crossing a 256-boundary + if (j & 0xFF) == 0xFF { + let pos_bytes = ((j / 256) as u32).to_le_bytes(); + let mut hasher = Sha256::new(); + hasher.update(seed); + hasher.update([round_byte]); + hasher.update(pos_bytes); + source = hasher.finalize().to_vec(); + } + + // Update byte_v when crossing an 8-boundary + if (j & 0x07) == 0x07 { + byte_v = source[(j & 0xFF) / 8]; + } + + // Check the bit + let bit = (byte_v >> (j & 0x07)) & 0x01; + if bit == 1 { + indices.swap(i, j); + } + } + + // Second half: i in [pivot+1, mirror2) + let mirror2 = (pivot + n + 1) / 2; + let list_end = n - 1; + source = { + let pos_bytes = ((list_end / 256) as u32).to_le_bytes(); + let mut hasher = Sha256::new(); + hasher.update(seed); + hasher.update([round_byte]); + hasher.update(pos_bytes); + hasher.finalize().to_vec() + }; + byte_v = source[(list_end & 0xFF) / 8]; + + for i in (pivot + 1)..mirror2 { + let loop_iter = i - (pivot + 1); + let j = list_end - loop_iter; + + if (j & 0xFF) == 0xFF { + let pos_bytes = ((j / 256) as u32).to_le_bytes(); + let mut hasher = Sha256::new(); + hasher.update(seed); + hasher.update([round_byte]); + hasher.update(pos_bytes); + source = hasher.finalize().to_vec(); + } + + if (j & 0x07) == 0x07 { + byte_v = source[(j & 0xFF) / 8]; + } + + let bit = (byte_v >> (j & 0x07)) & 0x01; + if bit == 1 { + indices.swap(i, j); + } + } + } +} diff --git a/test/spec/runners/rewards.ex b/test/spec/runners/rewards.ex index afbde0465..121b66740 100644 --- a/test/spec/runners/rewards.ex +++ b/test/spec/runners/rewards.ex @@ -53,13 +53,43 @@ defmodule RewardsTestRunner do |> Stream.map(&Enum.map(&1, fn {reward, penalty} -> reward - penalty end)) |> Enum.zip() + previous_epoch = + LambdaEthereumConsensus.StateTransition.Accessors.get_previous_epoch(pre_state) + + base_reward_per_increment = + LambdaEthereumConsensus.StateTransition.Accessors.get_base_reward_per_increment(pre_state) + calculated_deltas = Constants.participation_flag_weights() |> Stream.with_index() - |> Stream.map(fn {weight, index} -> - BeaconState.get_flag_index_deltas(pre_state, weight, index) + |> Stream.map(fn {weight, flag_index} -> + {:ok, unslashed_indices} = + LambdaEthereumConsensus.StateTransition.Accessors.get_unslashed_participating_indices( + pre_state, + flag_index, + previous_epoch + ) + + BeaconState.get_flag_index_deltas( + pre_state, + weight, + flag_index, + unslashed_indices, + base_reward_per_increment + ) end) - |> Stream.concat([BeaconState.get_inactivity_penalty_deltas(pre_state)]) + |> Stream.concat([ + ( + {:ok, target_indices} = + LambdaEthereumConsensus.StateTransition.Accessors.get_unslashed_participating_indices( + pre_state, + Constants.timely_target_flag_index(), + previous_epoch + ) + + BeaconState.get_inactivity_penalty_deltas(pre_state, target_indices) + ) + ]) |> Stream.zip() |> Enum.to_list()