From c596c36c8d22f449582059f91918ecef324f5572 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Fri, 8 May 2026 19:18:29 +0200 Subject: [PATCH 1/9] Add direct exec capabilities of the sidecar via dynamic linker Signed-off-by: Bob Weinand --- datadog-sidecar/src/config.rs | 4 + datadog-sidecar/src/entry.rs | 7 ++ datadog-sidecar/src/unix.rs | 54 +++++++---- spawn_worker/build.rs | 21 ++-- spawn_worker/src/direct_entry.c | 70 ++++++++++++++ spawn_worker/src/unix/mod.rs | 47 +++++++++ spawn_worker/src/unix/spawn.rs | 158 +++++++++++++++++++++---------- tools/sidecar_mockgen/src/lib.rs | 102 +++++++++++++++++--- 8 files changed, 374 insertions(+), 89 deletions(-) create mode 100644 spawn_worker/src/direct_entry.c diff --git a/datadog-sidecar/src/config.rs b/datadog-sidecar/src/config.rs index aa82ce0d34..95ecf221eb 100644 --- a/datadog-sidecar/src/config.rs +++ b/datadog-sidecar/src/config.rs @@ -89,6 +89,8 @@ pub struct Config { /// Socket/pipe buffer size for IPC connections (bytes). /// 0 means use the platform default. pub pipe_buffer_size: usize, + #[cfg(target_os = "linux")] + pub spawn_without_trampoline: bool, } #[derive(Debug, Clone)] @@ -259,6 +261,8 @@ impl FromEnv { appsec_config: Self::appsec_config(), max_memory: Self::max_memory(), pipe_buffer_size: Self::pipe_buffer_size(), + #[cfg(target_os = "linux")] + spawn_without_trampoline: false, } } diff --git a/datadog-sidecar/src/entry.rs b/datadog-sidecar/src/entry.rs index 4da1491bd7..d800a3efd0 100644 --- a/datadog-sidecar/src/entry.rs +++ b/datadog-sidecar/src/entry.rs @@ -4,6 +4,8 @@ use anyhow::Context; #[cfg(unix)] use libdd_crashtracker; +#[cfg(target_os = "linux")] +use spawn_worker::read_pt_interp_self; use spawn_worker::{entrypoint, Stdio}; use std::fs::File; use std::future::Future; @@ -221,6 +223,11 @@ pub fn daemonize(listener: IpcServer, mut cfg: Config) -> anyhow::Result<()> { #[allow(unused_unsafe)] // the unix method is unsafe let mut spawn_cfg = unsafe { spawn_worker::SpawnWorker::new() }; + #[cfg(target_os = "linux")] + if cfg.spawn_without_trampoline && read_pt_interp_self().is_some() { + spawn_cfg.spawn_method(spawn_worker::SpawnMethod::Direct); + } + spawn_cfg.target(entrypoint!(ddog_daemon_entry_point)); match cfg.log_method { diff --git a/datadog-sidecar/src/unix.rs b/datadog-sidecar/src/unix.rs index be42b52d49..7eddbeece7 100644 --- a/datadog-sidecar/src/unix.rs +++ b/datadog-sidecar/src/unix.rs @@ -45,7 +45,11 @@ pub extern "C" fn ddog_daemon_entry_point(trampoline_data: &TrampolineData) { let _ = prctl::set_name("dd-ipc-helper"); #[cfg(target_os = "linux")] - if let Err(e) = init_crashtracker(trampoline_data.dependency_paths) { + if let Err(e) = init_crashtracker(if trampoline_data.argc > 0 { + Some(trampoline_data.dependency_paths) + } else { + None + }) { warn!("Failed to initialize crashtracker: {e}"); } @@ -214,32 +218,44 @@ fn shutdown_appsec() -> bool { } #[cfg(target_os = "linux")] -fn init_crashtracker(dependency_paths: *const *const libc::c_char) -> anyhow::Result<()> { +fn init_crashtracker(dependency_paths: Option<*const *const libc::c_char>) -> anyhow::Result<()> { let entrypoint = entrypoint!(ddog_crashtracker_entry_point); let entrypoint_path = match unsafe { get_dl_path_raw(entrypoint.ptr as *const libc::c_void) } { (Some(path), _) => path, _ => anyhow::bail!("Failed to find crashtracker entrypoint"), }; - - let mut receiver_args = vec![ - "crashtracker_receiver".to_string(), - "".to_string(), - entrypoint_path.into_string()?, - ]; - - unsafe { - let mut descriptors = dependency_paths; - if !descriptors.is_null() { - loop { - if (*descriptors).is_null() { - break; + let entrypoint_path_str = entrypoint_path.into_string()?; + + let mut receiver_args = vec!["crashtracker_receiver".to_string()]; + let mut receiver_env = vec![]; + let entrypoint_name = entrypoint.symbol_name.into_string()?; + + if let Some(dependency_paths) = dependency_paths { + receiver_args.push("".to_string()); + receiver_args.push(entrypoint_path_str.clone()); + unsafe { + let mut descriptors = dependency_paths; + if !descriptors.is_null() { + loop { + if (*descriptors).is_null() { + break; + } + receiver_args.push(CStr::from_ptr(*descriptors).to_string_lossy().into_owned()); + descriptors = descriptors.add(1); } - receiver_args.push(CStr::from_ptr(*descriptors).to_string_lossy().into_owned()); - descriptors = descriptors.add(1); + } + } + receiver_args.push(entrypoint_name); + } else { + // direct mode: ld.so uses argv[1] as the library to exec + receiver_args.push(entrypoint_path_str.clone()); + receiver_env.push(("_DD_SIDECAR_DIRECT_EXEC".to_string(), entrypoint_name)); + if let Ok(env) = std::env::var("_DD_SIDECAR_PATH_DEPS") { + if !env.is_empty() { + receiver_env.push(("_DD_SIDECAR_PATH_DEPS".to_string(), env)); } } } - receiver_args.push(entrypoint.symbol_name.into_string()?); let output = match &Config::get().log_method { LogMethod::Stdout => Some(format!("/proc/{}/fd/1", unsafe { libc::getpid() })), @@ -269,7 +285,7 @@ fn init_crashtracker(dependency_paths: *const *const libc::c_char) -> anyhow::Re config_builder.build()?, CrashtrackerReceiverConfig::new( receiver_args, - vec![], + receiver_env, format!("/proc/{}/exe", unsafe { libc::getpid() }), output, None, diff --git a/spawn_worker/build.rs b/spawn_worker/build.rs index 1d0cc069b7..9362bf7bf8 100644 --- a/spawn_worker/build.rs +++ b/spawn_worker/build.rs @@ -4,6 +4,18 @@ pub use cc_utils::cc; fn main() { + let target_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + + // Compile the ELF entry point for the shared library (direct exec by ld.so). + if target_os == "linux" { + cc::Build::new() + .file("src/direct_entry.c") + .compile("ddtrace_direct_entry"); + println!("cargo:rerun-if-changed=src/direct_entry.c"); + // Note, users of direct mode have to add to their build flags: + // -Wl,-e,ddog_sidecar_direct_entry + } + let mut builder = cc_utils::ImprovedBuild::new(); builder .file("src/trampoline.c") @@ -13,7 +25,7 @@ fn main() { .warnings_into_errors(true) .emit_rerun_if_env_changed(true); - if !cfg!(target_os = "windows") { + if target_os != "windows" { builder.link_dynamically("dl"); if cfg!(target_os = "linux") { builder.flag("-Wl,--no-as-needed"); @@ -28,7 +40,7 @@ fn main() { builder.try_compile_executable("trampoline.bin").unwrap(); - if !cfg!(target_os = "windows") { + if target_os != "windows" { cc_utils::ImprovedBuild::new() .file("src/ld_preload_trampoline.c") .link_dynamically("dl") @@ -37,10 +49,7 @@ fn main() { .emit_rerun_if_env_changed(true) .try_compile_shared_lib("ld_preload_trampoline.shared_lib") .unwrap(); - } - - #[cfg(target_os = "windows")] - { + } else { cc_utils::ImprovedBuild::new() .file("src/crashtracking_trampoline.cpp") // Path to your C++ file .warnings(true) diff --git a/spawn_worker/src/direct_entry.c b/spawn_worker/src/direct_entry.c new file mode 100644 index 0000000000..dfa7054fce --- /dev/null +++ b/spawn_worker/src/direct_entry.c @@ -0,0 +1,70 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +// This file provides the ELF entry point (ddog_sidecar_direct_entry) for the +// shared library that contains spawn_worker (ddtrace.so in non-SSI builds, +// libddtrace_php.so in SSI builds). When ld.so exec's that library directly, +// it calls this function rather than the trampoline. +// +// Linked as the ELF e_entry via: +// - cargo:rustc-cdylib-link-arg=-Wl,-e,ddog_sidecar_direct_entry (cdylib / SSI) +// - -Wl,-e,ddog_sidecar_direct_entry in EXTRA_LDFLAGS (ddtrace.so / non-SSI) + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +// All fields are null/zero when calling from Direct spawn (no deps to clean up). +typedef struct { + int argc; + const char **argv; + const char **dependency_paths; +} trampoline_data_t; + +// dlopen() each colon-separated path in _DD_SIDECAR_PATH_DEPS. +static void dlopen_path_deps(void) { + const char *deps = getenv("_DD_SIDECAR_PATH_DEPS"); + if (!deps || !*deps) return; + + // Work on a copy so we can NUL-terminate each token in place. + size_t len = strlen(deps); + char *buf = alloca(len + 1); + memcpy(buf, deps, len + 1); + + char *p = buf; + while (*p) { + char *colon = strchr(p, ':'); + if (colon) *colon = '\0'; + if (*p) dlopen(p, RTLD_LAZY | RTLD_GLOBAL); + if (!colon) break; + p = colon + 1; + } +} + +// Called by ld.so when the library is exec'd directly. +// Linked as the ELF e_entry. +// +// _DD_SIDECAR_DIRECT_EXEC must be set to the name of the symbol to call +__attribute__((visibility("default"))) +void ddog_sidecar_direct_entry(void) { + const char *sym_name = getenv("_DD_SIDECAR_DIRECT_EXEC"); + if (!sym_name || !*sym_name) { + _exit(1); + } + + // Load any path-dep libraries listed in _DD_SIDECAR_PATH_DEPS. + dlopen_path_deps(); + + // Call the requested symbol — avoids a link-time dependency on + // datadog-sidecar from spawn_worker. + typedef void (*entry_fn_t)(const trampoline_data_t *); + entry_fn_t entry = (entry_fn_t)dlsym(RTLD_DEFAULT, sym_name); + if (entry) { + trampoline_data_t data = {0}; + entry(&data); + } + _exit(0); +} diff --git a/spawn_worker/src/unix/mod.rs b/spawn_worker/src/unix/mod.rs index 23998e8fd0..fb02541c8d 100644 --- a/spawn_worker/src/unix/mod.rs +++ b/spawn_worker/src/unix/mod.rs @@ -57,6 +57,53 @@ pub fn getpid() -> libc::pid_t { unsafe { libc::getpid() } } +/// Return the path to the dynamic linker (PT_INTERP) of the current process. +#[cfg(target_os = "linux")] +pub fn read_pt_interp_self() -> Option { + // Auxiliary vector entries for the current process's executable PHDRs. + // SAFETY: getauxval is signal-safe and idempotent. + let phdr_addr = unsafe { libc::getauxval(libc::AT_PHDR) } as usize; + let phent = unsafe { libc::getauxval(libc::AT_PHENT) } as usize; + let phnum = unsafe { libc::getauxval(libc::AT_PHNUM) } as usize; + + if phdr_addr == 0 || phent == 0 || phnum == 0 { + return None; + } + + // Walk the in-memory program headers. We need two passes: + // 1. Find PT_PHDR to compute the load-base offset (PIE ASLR correction). + // 2. Find PT_INTERP to get the interpreter path's virtual address. + let mut load_base: isize = 0; + let mut interp_vaddr: usize = 0; + + for i in 0..phnum { + // SAFETY: AT_PHDR + i*phent is within the mapped PHDR table placed by the kernel. + let ph = (phdr_addr + i * phent) as *const libc::Elf64_Phdr; + let p_type = unsafe { (*ph).p_type }; + let p_vaddr = unsafe { (*ph).p_vaddr } as usize; + + if p_type == libc::PT_PHDR { + // load_base = runtime_addr_of_PHDRs − link-time vaddr of PHDRs + load_base = phdr_addr as isize - p_vaddr as isize; + } + if p_type == 3 { + // PT_INTERP = 3; the interpreter string lives at this vaddr. + interp_vaddr = p_vaddr; + } + } + + if interp_vaddr == 0 { + return None; + } + + // Compute the runtime address of the null-terminated interpreter path. + let interp_ptr = load_base.checked_add(interp_vaddr as isize)? as *const libc::c_char; + // SAFETY: the interpreter path is a valid C string placed by the kernel in the mapped + // PT_INTERP segment; it is readable for the lifetime of the process. + let interp = unsafe { CStr::from_ptr(interp_ptr) }; + Some(std::path::PathBuf::from(interp.to_string_lossy().as_ref())) +} + impl Entrypoint { pub fn get_fs_path(&self) -> Option { let (path, _) = unsafe { get_dl_path_raw(self.ptr as *const libc::c_void) }; diff --git a/spawn_worker/src/unix/spawn.rs b/spawn_worker/src/unix/spawn.rs index dc0b271902..ecc998a0a3 100644 --- a/spawn_worker/src/unix/spawn.rs +++ b/spawn_worker/src/unix/spawn.rs @@ -98,6 +98,8 @@ use nix::libc; #[derive(Clone)] pub enum SpawnMethod { + #[cfg(target_os = "linux")] + Direct, #[cfg(target_os = "linux")] FdExec, #[cfg(not(target_os = "macos"))] @@ -316,13 +318,25 @@ impl SpawnWorker { } fn do_spawn(&self) -> anyhow::Result> { + #[allow(unused_mut)] + let mut spawn_method = match &self.spawn_method { + Some(m) => m.clone(), + None => self.target.detect_spawn_method()?, + }; + let mut argv = ExecVec::empty(); + + // On Linux, Direct mode uses env vars for deps instead of argv entries. + #[cfg(target_os = "linux")] + let use_direct = matches!(spawn_method, SpawnMethod::Direct); + #[cfg(not(target_os = "linux"))] + let use_direct = false; + // set argv[0] and process name shown eg in `ps` let process_name = CString::new(self.process_name.as_deref().unwrap_or("spawned_worker"))?; argv.push(process_name); - argv.push(CString::new("")?); - let entrypoint_symbol_name = match &self.target { + let (entrypoint_object, entrypoint_symbol_name) = match &self.target { Target::Entrypoint(entrypoint) => { let path = match unsafe { crate::get_dl_path_raw(entrypoint.ptr as *const libc::c_void) @@ -331,16 +345,20 @@ impl SpawnWorker { _ => return Err(anyhow::format_err!("can't read symbol pointer data")), }; - argv.push(path); - entrypoint.symbol_name.clone() - } - Target::ManualTrampoline(path, symbol_name) => { - argv.push(CString::new(path.as_str())?); - CString::new(symbol_name.as_str())? + (path, entrypoint.symbol_name.clone()) } + Target::ManualTrampoline(path, symbol_name) => ( + CString::new(path.as_str())?, + CString::new(symbol_name.as_str())?, + ), Target::Noop => return Ok(None), }; + if !use_direct { + argv.push(CString::new("")?); + } + argv.push(entrypoint_object); + let mut envp = ExecVec::empty(); for (k, v) in &self.env { // reserve space for '=' and final null @@ -377,62 +395,89 @@ impl SpawnWorker { // make sure the fd_to_pass is not dropped until the end of the function let fd_to_pass = fd_to_pass.as_ref(); - // setup final spawn - - #[allow(unused_mut)] - let mut spawn_method = match &self.spawn_method { - Some(m) => m.clone(), - None => self.target.detect_spawn_method()?, - }; - let mut temp_files = vec![]; #[cfg(target_os = "linux")] let mut temp_memfds = vec![]; - for dep in &self.shared_lib_dependencies { - match dep { - LibDependency::Path(path) => { - argv.push(CString::new(path.to_string_lossy().to_string())?) + + #[cfg(target_os = "linux")] + let direct_ld_cstr = if use_direct { + let mut env = b"_DD_SIDECAR_DIRECT_EXEC=".to_vec(); + env.extend_from_slice(entrypoint_symbol_name.as_bytes_with_nul()); + envp.push(CString::from_vec_with_nul(env)?); + // Binary deps (mock_php) skipped: PHP symbols are weakened in .dynsym. + let path_deps: Vec = self + .shared_lib_dependencies + .iter() + .filter_map(|dep| match dep { + LibDependency::Path(p) => Some(p.to_string_lossy().into_owned()), + LibDependency::Binary(_) => None, + }) + .collect(); + if !path_deps.is_empty() { + if let Ok(s) = + CString::new(format!("_DD_SIDECAR_PATH_DEPS={}", path_deps.join(":"))) + { + envp.push(s); } - LibDependency::Binary(bin) => { - let mut tempfile = || -> anyhow::Result<()> { - let path = CString::new( - write_to_tmp_file(bin)? - .into_temp_path() - .keep()? // ensure the file is not auto cleaned in parent process - .as_os_str() - .to_str() - .ok_or_else(|| { - anyhow::format_err!("can't convert tmp file path") - })?, - )?; - temp_files.push(path.clone()); - argv.push(CString::new("-")?); - argv.push(path); - Ok(()) - }; - #[cfg(target_os = "linux")] - if matches!(spawn_method, SpawnMethod::FdExec) { - if let Ok(memfd) = linux::write_memfd("trampoline_dependencies.so", bin) { - let basefds = if fd_to_pass.is_some() { 4 } else { 3 }; - argv.push(CString::new(format!( - "/proc/self/fd/{}", - temp_memfds.len() + basefds - ))?); - temp_memfds.push(memfd); + } + + Some(CString::new( + crate::read_pt_interp_self() + .ok_or_else(|| { + anyhow::format_err!("Direct spawn: no PT_INTERP in current process") + })? + .to_str() + .ok_or_else(|| anyhow::format_err!("non-UTF8 interp path"))?, + )?) + } else { + for dep in &self.shared_lib_dependencies { + match dep { + LibDependency::Path(path) => { + argv.push(CString::new(path.to_string_lossy().to_string())?) + } + LibDependency::Binary(bin) => { + let mut tempfile = || -> anyhow::Result<()> { + let path = CString::new( + write_to_tmp_file(bin)? + .into_temp_path() + .keep()? // ensure the file is not auto cleaned in parent process + .as_os_str() + .to_str() + .ok_or_else(|| { + anyhow::format_err!("can't convert tmp file path") + })?, + )?; + temp_files.push(path.clone()); + argv.push(CString::new("-")?); + argv.push(path); + Ok(()) + }; + #[cfg(target_os = "linux")] + if matches!(spawn_method, SpawnMethod::FdExec) { + if let Ok(memfd) = linux::write_memfd("trampoline_dependencies.so", bin) + { + let basefds = if fd_to_pass.is_some() { 4 } else { 3 }; + argv.push(CString::new(format!( + "/proc/self/fd/{}", + temp_memfds.len() + basefds + ))?); + temp_memfds.push(memfd); + } else { + spawn_method = SpawnMethod::Exec; + tempfile()?; + } } else { - spawn_method = SpawnMethod::Exec; tempfile()?; } - } else { + #[cfg(not(target_os = "linux"))] tempfile()?; } - #[cfg(not(target_os = "linux"))] - tempfile()?; } } - } - argv.push(entrypoint_symbol_name); + argv.push(entrypoint_symbol_name); + None + }; // build and allocate final exec fn and its dependencies #[cfg(target_os = "linux")] @@ -441,6 +486,15 @@ impl SpawnWorker { let skip_close_fd = 0; let mut spawn: Box = match spawn_method { + #[cfg(target_os = "linux")] + SpawnMethod::Direct => { + // argv is already [ld_interp, so_path]; envp has Direct-specific entries. + let ld_cstr = direct_ld_cstr.expect("ld_cstr built above for Direct"); + Box::new(move || unsafe { + libc::execve(ld_cstr.as_ptr(), argv.as_ptr(), envp.as_ptr()); + panic!("{}", std::io::Error::last_os_error()); + }) + } #[cfg(target_os = "linux")] SpawnMethod::FdExec => { let fd = linux::write_trampoline()?; diff --git a/tools/sidecar_mockgen/src/lib.rs b/tools/sidecar_mockgen/src/lib.rs index a194c39424..3ce8263b76 100644 --- a/tools/sidecar_mockgen/src/lib.rs +++ b/tools/sidecar_mockgen/src/lib.rs @@ -1,7 +1,7 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use object::{File, Object, ObjectSymbol, Symbol, SymbolFlags, SymbolKind}; +use object::{File, Object, ObjectSection, ObjectSymbol, Symbol, SymbolFlags, SymbolKind}; use std::collections::HashSet; use std::fmt::Write; use std::path::Path; @@ -23,6 +23,18 @@ fn check_and_parse<'a>( } } + +fn sym_is_definition(sym: &Symbol) -> bool { + if sym.is_definition() { + return true; + } + match sym.flags() { + // 10 == STT_GNU_IFUNC for ELF files + SymbolFlags::Elf { st_info, .. } => st_info & 0xf == 10, + _ => false, + } +} + /// args: first a shared object or executable file, then object files it is to be diffed against pub fn generate_mock_symbols(binary: &Path, objects: &[&Path]) -> Result { let mut missing_symbols = HashSet::new(); @@ -42,17 +54,6 @@ pub fn generate_mock_symbols(binary: &Path, objects: &[&Path]) -> Result bool { - if sym.is_definition() { - return true; - } - match sym.flags() { - // 10 == STT_GNU_IFUNC for ELF files - SymbolFlags::Elf { st_info, .. } => st_info & 0xf == 10, - _ => false, - } - } - let mut generated = String::new(); for sym in so_file.symbols().chain(so_file.dynamic_symbols()) { if sym_is_definition(&sym) { @@ -100,3 +101,80 @@ pub fn generate_mock_symbols(binary: &Path, objects: &[&Path]) -> Result Result<(), String> { + let data = fs::read(target) + .map_err(|e| format!("read {}: {e}", target.display()))?; + + let undefined_candidates: HashSet = File::parse(data.as_slice()) + .map_err(|e| format!("parse {}: {e}", target.display()))?.symbols() + .filter(|s| s.is_undefined() && !s.is_weak()) + .filter_map(|s| s.name().ok().map(|n| n.to_string())) + .collect(); + + // Filter symbols from binary. + let symbols = { + let bin_data = fs::read(binary) + .map_err(|e| format!("read {}: {e}", binary.display()))?; + let so_file = File::parse(bin_data.as_slice()) + .map_err(|e| format!("parse {}: {e}", binary.display()))?; + let mut result = HashSet::new(); + for sym in so_file.symbols().chain(so_file.dynamic_symbols()) { + if sym_is_definition(&sym) { + if let Ok(name) = sym.name() { + if undefined_candidates.contains(name) { + #[cfg(target_os = "macos")] + let name = &name[1..]; + result.insert(name.to_string()); + } + } + } + } + result + }; + + weaken_symtab(target, &symbols) +} + +/// Weaken select symbols in the `.symtab` of an ELF relocatable object (`.o`). +/// +/// Locates each symbol by index in the `.symtab` section, then flips `st_bind` from `STB_GLOBAL(1)` +/// to `STB_WEAK(2)` for the given `symbols` set. +fn weaken_symtab(obj_path: &Path, symbols: &HashSet) -> Result<(), String> { + let mut data = fs::read(obj_path) + .map_err(|e| format!("read {}: {e}", obj_path.display()))?; + + let patches: Vec = { + let elf = File::parse(data.as_slice()) + .map_err(|e| format!("parse {}: {e}", obj_path.display()))?; + + let symtab = match elf.section_by_name(".symtab") { + Some(s) => s, + None => return Ok(()), // no .symtab — nothing to do + }; + let (symtab_off, _) = symtab + .file_range() + .ok_or_else(|| format!("{}: .symtab has no file range", obj_path.display()))?; + let entsize: u64 = 24; // sizeof(Elf64_Sym); st_info at byte offset 4 + + elf.symbols() + .filter(|sym| { + sym.is_undefined() + && !sym.is_weak() + && sym.name().map_or(false, |n| symbols.contains(n)) + }) + .map(|sym| (symtab_off + sym.index().0 as u64 * entsize + 4) as usize) + .collect() + }; + + if patches.is_empty() { + return Ok(()); + } + for st_info_pos in patches { + let old = data[st_info_pos]; + data[st_info_pos] = (2u8 << 4) | (old & 0xf); // STB_WEAK = 2 + } + fs::write(obj_path, &data) + .map_err(|e| format!("write {}: {e}", obj_path.display())) +} \ No newline at end of file From 403e9f84a39a3f086113b116c0f1debdd40cf734 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Sat, 9 May 2026 05:13:53 +0200 Subject: [PATCH 2/9] Run init_array Signed-off-by: Bob Weinand --- spawn_worker/src/direct_entry.c | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/spawn_worker/src/direct_entry.c b/spawn_worker/src/direct_entry.c index dfa7054fce..ac82113d6f 100644 --- a/spawn_worker/src/direct_entry.c +++ b/spawn_worker/src/direct_entry.c @@ -16,6 +16,10 @@ #include #include #include +#ifdef __linux__ +# include +# include +#endif // All fields are null/zero when calling from Direct spawn (no deps to clean up). typedef struct { @@ -44,12 +48,55 @@ static void dlopen_path_deps(void) { } } +// Called by dl_iterate_phdr to run DT_INIT_ARRAY for our own library. +// Marked no_sanitize so it can run before ASAN's per-object init completes. +#ifdef __linux__ +static int __attribute__((no_sanitize("address"), no_sanitize("undefined"))) +run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { + for (int i = 0; i < info->dlpi_phnum; i++) { + if (info->dlpi_phdr[i].p_type != PT_LOAD) continue; + uintptr_t start = info->dlpi_addr + info->dlpi_phdr[i].p_vaddr; + uintptr_t end = start + info->dlpi_phdr[i].p_memsz; + if ((uintptr_t)self_addr < start || (uintptr_t)self_addr >= end) continue; + // Found our library — locate DT_INIT_ARRAY in its DYNAMIC segment. + for (int j = 0; j < info->dlpi_phnum; j++) { + if (info->dlpi_phdr[j].p_type != PT_DYNAMIC) continue; + ElfW(Dyn) *dyn = (ElfW(Dyn) *)(info->dlpi_addr + info->dlpi_phdr[j].p_vaddr); + void (**arr)(void) = NULL; + size_t sz = 0; + for (; dyn->d_tag != DT_NULL; dyn++) { + if (dyn->d_tag == DT_INIT_ARRAY) + arr = (void (**)(void))(info->dlpi_addr + dyn->d_un.d_ptr); + if (dyn->d_tag == DT_INIT_ARRAYSZ) + sz = dyn->d_un.d_val; + } + if (arr) { + for (size_t k = 0; k < sz / sizeof(void *); k++) { + if (arr[k] && (uintptr_t)arr[k] != (uintptr_t)-1) + arr[k](); + } + } + return 1; + } + } + return 0; +} +#endif /* __linux__ */ + // Called by ld.so when the library is exec'd directly. // Linked as the ELF e_entry. // // _DD_SIDECAR_DIRECT_EXEC must be set to the name of the symbol to call __attribute__((visibility("default"))) void ddog_sidecar_direct_entry(void) { + // Run our own DT_INIT_ARRAY before any other code. + // ld.so skips DT_INIT_ARRAY for the main module in direct-exec mode, so + // ASAN's per-object global registration and other constructors never run + // unless we trigger them explicitly. +#ifdef __linux__ + dl_iterate_phdr(run_init_array_cb, (void *)&ddog_sidecar_direct_entry); +#endif + const char *sym_name = getenv("_DD_SIDECAR_DIRECT_EXEC"); if (!sym_name || !*sym_name) { _exit(1); From 295822b4882c4ca3094708a6b69b9cef4d55ab29 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Sat, 9 May 2026 15:43:06 +0200 Subject: [PATCH 3/9] Fix stack misalignment on x64 Signed-off-by: Bob Weinand --- spawn_worker/src/direct_entry.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/spawn_worker/src/direct_entry.c b/spawn_worker/src/direct_entry.c index ac82113d6f..ab727ec8b5 100644 --- a/spawn_worker/src/direct_entry.c +++ b/spawn_worker/src/direct_entry.c @@ -81,7 +81,7 @@ run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { } return 0; } -#endif /* __linux__ */ +#endif // Called by ld.so when the library is exec'd directly. // Linked as the ELF e_entry. @@ -89,6 +89,11 @@ run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { // _DD_SIDECAR_DIRECT_EXEC must be set to the name of the symbol to call __attribute__((visibility("default"))) void ddog_sidecar_direct_entry(void) { +#if defined(__x86_64__) + // ensure 16 byte stack alignment + __asm__ volatile ("and $-16, %%rsp" ::: "memory", "cc"); +#endif + // Run our own DT_INIT_ARRAY before any other code. // ld.so skips DT_INIT_ARRAY for the main module in direct-exec mode, so // ASAN's per-object global registration and other constructors never run From 08034e29d57290d7472fe89da55bffc8baaa08d4 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Sat, 9 May 2026 18:34:02 +0200 Subject: [PATCH 4/9] DEBUG Signed-off-by: Bob Weinand --- spawn_worker/build.rs | 26 +++++- spawn_worker/src/check_execinfo.c | 6 ++ spawn_worker/src/direct_entry.c | 149 +++++++++++++++++++++++++++++- 3 files changed, 174 insertions(+), 7 deletions(-) create mode 100644 spawn_worker/src/check_execinfo.c diff --git a/spawn_worker/build.rs b/spawn_worker/build.rs index 9362bf7bf8..c0bd7178d4 100644 --- a/spawn_worker/build.rs +++ b/spawn_worker/build.rs @@ -8,10 +8,30 @@ fn main() { // Compile the ELF entry point for the shared library (direct exec by ld.so). if target_os == "linux" { - cc::Build::new() - .file("src/direct_entry.c") - .compile("ddtrace_direct_entry"); + // Detect whether execinfo (backtrace()) is available. + // On glibc it's in libc itself; on musl a separate -lexecinfo may be needed. + // Use a fresh build to probe; don't reuse the main build that has direct_entry.c. + let probe_result = cc::Build::new() + .file("src/check_execinfo.c") + .try_compile("check_execinfo_probe"); + let have_execinfo = probe_result.is_ok(); + println!("cargo:warning=execinfo probe: {} ({:?})", have_execinfo, probe_result.err()); + + let mut build = cc::Build::new(); + build.file("src/direct_entry.c"); + if have_execinfo { + build.define("HAVE_BACKTRACE", "1"); + // On musl, backtrace() lives in libexecinfo (separate package). + // On glibc, it is already in libc — adding -lexecinfo would fail. + let target_env = std::env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default(); + if target_env == "musl" { + println!("cargo:rustc-link-lib=execinfo"); + } + } + + build.compile("ddtrace_direct_entry"); println!("cargo:rerun-if-changed=src/direct_entry.c"); + println!("cargo:rerun-if-changed=src/check_execinfo.c"); // Note, users of direct mode have to add to their build flags: // -Wl,-e,ddog_sidecar_direct_entry } diff --git a/spawn_worker/src/check_execinfo.c b/spawn_worker/src/check_execinfo.c new file mode 100644 index 0000000000..fcd328455e --- /dev/null +++ b/spawn_worker/src/check_execinfo.c @@ -0,0 +1,6 @@ +/* Probe: does backtrace() exist and link successfully? */ +#include +int main(void) { + void *buf[1]; + return backtrace(buf, 1) >= 0 ? 0 : 1; +} diff --git a/spawn_worker/src/direct_entry.c b/spawn_worker/src/direct_entry.c index ab727ec8b5..c81003010b 100644 --- a/spawn_worker/src/direct_entry.c +++ b/spawn_worker/src/direct_entry.c @@ -12,6 +12,12 @@ #define _GNU_SOURCE #include +#include +#include +#ifdef __linux__ +# include +#endif +#include #include #include #include @@ -20,6 +26,10 @@ # include # include #endif +/* HAVE_BACKTRACE is defined by build.rs when execinfo is available and links */ +#ifdef HAVE_BACKTRACE +# include +#endif // All fields are null/zero when calling from Direct spawn (no deps to clean up). typedef struct { @@ -71,9 +81,11 @@ run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { sz = dyn->d_un.d_val; } if (arr) { + typedef void (*init_fn_t)(int, char **, char **); + extern char **environ; for (size_t k = 0; k < sz / sizeof(void *); k++) { if (arr[k] && (uintptr_t)arr[k] != (uintptr_t)-1) - arr[k](); + ((init_fn_t)arr[k])(0, NULL, environ); } } return 1; @@ -83,17 +95,146 @@ run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { } #endif +// Signal handler: write crash info to stderr AND /tmp/ddog_sidecar_crash_. +// Uses only async-signal-safe functions. +static void crash_handler(int sig, siginfo_t *si, void *ctx) { + (void)si; + char path[64]; + pid_t pid = getpid(); + const char prefix[] = "/tmp/ddog_sidecar_crash_"; + int pos = 0; + for (int i = 0; prefix[i]; i++) path[pos++] = prefix[i]; + char pidbuf[20]; int plen = 0; + unsigned long p = (unsigned long)pid; + if (!p) { pidbuf[plen++] = '0'; } + else { char tmp[20]; int tl = 0; while (p) { tmp[tl++] = '0' + (int)(p % 10); p /= 10; } + for (int i = tl-1; i >= 0; i--) pidbuf[plen++] = tmp[i]; } + for (int i = 0; i < plen; i++) path[pos++] = pidbuf[i]; + path[pos] = '\0'; + + int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + int fds[2] = { STDERR_FILENO, fd }; + + const char hdr[] = "\n=== ddog_sidecar_direct_entry: fatal signal ===\n"; + for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], hdr, sizeof(hdr) - 1); + + // Write signal number and fault/crash addresses using only async-signal-safe ops. + // backtrace() is NOT called: the and$-16 stack alignment breaks CFI, causing + // _Unwind_Backtrace to fault. Instead we extract the IP directly from ucontext. + static const char hex[] = "0123456789abcdef"; + // Helper: write "label: 0xHEX\n" for an unsigned long +#define WRITE_HEX(label, val) do { \ + const char _lab[] = label ": 0x"; \ + for (int _i = 0; _i < 2; _i++) if (fds[_i] >= 0) write(fds[_i], _lab, sizeof(_lab)-1); \ + char _hbuf[18]; int _hl = 0; unsigned long _v = (unsigned long)(val); \ + if (!_v) { _hbuf[_hl++] = '0'; } \ + else { char _tmp[16]; int _tl = 0; while (_v) { _tmp[_tl++] = hex[_v&0xf]; _v>>=4; } \ + for (int _j=_tl-1;_j>=0;_j--) _hbuf[_hl++]=_tmp[_j]; } \ + _hbuf[_hl++] = '\n'; \ + for (int _i = 0; _i < 2; _i++) if (fds[_i] >= 0) write(fds[_i], _hbuf, _hl); \ + } while(0) + + { int s = sig; char sl[24] = "signal: "; int sll = 8; + char stmp[10]; int stl = 0; + if (!s) stmp[stl++] = '0'; + else { while (s > 0) { stmp[stl++] = '0' + s % 10; s /= 10; } } + for (int i = stl-1; i >= 0; i--) sl[sll++] = stmp[i]; + sl[sll++] = '\n'; + for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], sl, sll); } + + if (si) WRITE_HEX("fault_addr", si->si_addr); + +#if defined(__linux__) && defined(__x86_64__) + if (ctx) { + ucontext_t *uc = (ucontext_t *)ctx; + WRITE_HEX("rip", uc->uc_mcontext.gregs[REG_RIP]); + WRITE_HEX("rsp", uc->uc_mcontext.gregs[REG_RSP]); + } +#elif defined(__linux__) && defined(__aarch64__) + if (ctx) { + ucontext_t *uc = (ucontext_t *)ctx; + WRITE_HEX("pc", uc->uc_mcontext.pc); + WRITE_HEX("sp", uc->uc_mcontext.sp); + } +#endif +#undef WRITE_HEX + + // Dump /proc/self/maps so RIP can be attributed to a library +#ifdef __linux__ + { + const char maps_hdr[] = "\n=== /proc/self/maps ===\n"; + for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], maps_hdr, sizeof(maps_hdr)-1); + int mfd = open("/proc/self/maps", O_RDONLY); + if (mfd >= 0) { + char mbuf[4096]; + ssize_t n; + while ((n = read(mfd, mbuf, sizeof(mbuf))) > 0) + for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], mbuf, (size_t)n); + close(mfd); + } + } +#endif + + if (fd >= 0) close(fd); + + struct sigaction sa = { .sa_handler = SIG_DFL }; + sigemptyset(&sa.sa_mask); + sigaction(sig, &sa, NULL); + raise(sig); +} + // Called by ld.so when the library is exec'd directly. // Linked as the ELF e_entry. // // _DD_SIDECAR_DIRECT_EXEC must be set to the name of the symbol to call + +// Hidden (not static) so asm can reference it by name. +// @PLT in the call generates R_X86_64_PLT32 which old linkers accept for shared objects. +// 'used' prevents LTO from dropping it (it's only called from the naked asm below). +__attribute__((visibility("hidden"), used, noinline)) +void ddog_sidecar_direct_entry_body(void); + +// Naked wrapper: ld.so JUMPs (not calls) to e_entry, so rsp alignment is +// unpredictable. We must align the stack BEFORE the C prologue runs — doing +// it inside the function body is too late because the prologue already anchors +// rbp from the unaligned rsp, causing movaps on rbp-relative locals to fault +// with #GP (reported as SIGSEGV si_addr=0 on Linux). __attribute__((visibility("default"))) -void ddog_sidecar_direct_entry(void) { #if defined(__x86_64__) - // ensure 16 byte stack alignment - __asm__ volatile ("and $-16, %%rsp" ::: "memory", "cc"); +__attribute__((naked)) +void ddog_sidecar_direct_entry(void) { + __asm__ ( + "and $-16, %rsp\n\t" /* 16-byte align before C prologue sees rsp */ + "call ddog_sidecar_direct_entry_body@PLT\n\t" /* @PLT → R_X86_64_PLT32, valid in .so */ + "ud2" /* unreachable: body calls _exit */ + ); +} +#elif defined(__i386__) +__attribute__((naked)) +void ddog_sidecar_direct_entry(void) { + __asm__ ( + "and $-16, %esp\n\t" + "call ddog_sidecar_direct_entry_body@PLT\n\t" + "ud2" + ); +} +#else +void ddog_sidecar_direct_entry(void) { + ddog_sidecar_direct_entry_body(); +} #endif +__attribute__((visibility("hidden"), used, noinline)) +void ddog_sidecar_direct_entry_body(void) { + // Install crash handler so any fatal signal is captured in a file. + struct sigaction sa = { .sa_sigaction = crash_handler, + .sa_flags = SA_SIGINFO | SA_RESETHAND }; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGBUS, &sa, NULL); + sigaction(SIGABRT, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + // Run our own DT_INIT_ARRAY before any other code. // ld.so skips DT_INIT_ARRAY for the main module in direct-exec mode, so // ASAN's per-object global registration and other constructors never run From 3cf03f75e4e5d05d0706d48989f2baaf8fe7a38d Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Mon, 11 May 2026 16:51:07 +0200 Subject: [PATCH 5/9] Add macos symbol weakening Signed-off-by: Bob Weinand --- tools/sidecar_mockgen/src/lib.rs | 122 +++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 15 deletions(-) diff --git a/tools/sidecar_mockgen/src/lib.rs b/tools/sidecar_mockgen/src/lib.rs index 3ce8263b76..ee38712071 100644 --- a/tools/sidecar_mockgen/src/lib.rs +++ b/tools/sidecar_mockgen/src/lib.rs @@ -1,7 +1,9 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use object::{File, Object, ObjectSection, ObjectSymbol, Symbol, SymbolFlags, SymbolKind}; +use object::macho::MachHeader64; +use object::read::macho::{LoadCommandVariant, MachHeader}; +use object::{Endian, Endianness, File, FileKind, Object, ObjectSection, ObjectSymbol, Symbol, SymbolFlags, SymbolKind}; use std::collections::HashSet; use std::fmt::Write; use std::path::Path; @@ -138,25 +140,40 @@ pub fn weaken_object_symbols(target: &Path, binary: &Path) -> Result<(), String> } /// Weaken select symbols in the `.symtab` of an ELF relocatable object (`.o`). -/// -/// Locates each symbol by index in the `.symtab` section, then flips `st_bind` from `STB_GLOBAL(1)` -/// to `STB_WEAK(2)` for the given `symbols` set. +/// +/// - ELF64: flips `st_bind` from `STB_GLOBAL(1)` → `STB_WEAK(2)` in `.symtab` +/// - Mach-O64: sets `N_WEAK_REF(0x0040)` in `n_desc` in `LC_SYMTAB` fn weaken_symtab(obj_path: &Path, symbols: &HashSet) -> Result<(), String> { let mut data = fs::read(obj_path) .map_err(|e| format!("read {}: {e}", obj_path.display()))?; + let modified = match FileKind::parse(data.as_slice()) + .map_err(|e| format!("parse {}: {e}", obj_path.display()))? + { + FileKind::Elf64 => weaken_elf(&mut data, symbols, obj_path)?, + FileKind::MachO64 => weaken_macho(&mut data, symbols, obj_path)?, + _ => false, + }; + + if modified { + fs::write(obj_path, &data) + .map_err(|e| format!("write {}: {e}", obj_path.display()))?; + } + Ok(()) +} + +fn weaken_elf(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> Result { let patches: Vec = { - let elf = File::parse(data.as_slice()) + let elf = File::parse(&*data) .map_err(|e| format!("parse {}: {e}", obj_path.display()))?; let symtab = match elf.section_by_name(".symtab") { Some(s) => s, - None => return Ok(()), // no .symtab — nothing to do + None => return Ok(false), }; let (symtab_off, _) = symtab .file_range() .ok_or_else(|| format!("{}: .symtab has no file range", obj_path.display()))?; - let entsize: u64 = 24; // sizeof(Elf64_Sym); st_info at byte offset 4 elf.symbols() .filter(|sym| { @@ -164,17 +181,92 @@ fn weaken_symtab(obj_path: &Path, symbols: &HashSet) -> Result<(), Strin && !sym.is_weak() && sym.name().map_or(false, |n| symbols.contains(n)) }) - .map(|sym| (symtab_off + sym.index().0 as u64 * entsize + 4) as usize) + .map(|sym| (symtab_off + sym.index().0 as u64 * 24 + 4) as usize) // sizeof(Elf64_Sym)=24; st_info at +4 .collect() }; if patches.is_empty() { - return Ok(()); + return Ok(false); } - for st_info_pos in patches { - let old = data[st_info_pos]; - data[st_info_pos] = (2u8 << 4) | (old & 0xf); // STB_WEAK = 2 + for pos in patches { + let old = data[pos]; + data[pos] = (2u8 << 4) | (old & 0xf); // STB_WEAK = 2 } - fs::write(obj_path, &data) - .map_err(|e| format!("write {}: {e}", obj_path.display())) -} \ No newline at end of file + Ok(true) +} + +fn weaken_macho(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> Result { + let patches: Vec<(usize, [u8; 2])> = { + let file = File::parse(&*data) + .map_err(|e| format!("parse macho {}: {e}", obj_path.display()))?; + + // Mach-O symbol names have a leading '_' stripped when `symbols` was built. + let indices: Vec = file + .symbols() + .filter(|sym| { + sym.is_undefined() + && !sym.is_weak() + && sym.name().map_or(false, |n| { + symbols.contains(n.strip_prefix('_').unwrap_or(n)) + }) + }) + .map(|sym| sym.index().0) + .collect(); + + if indices.is_empty() { + return Ok(false); + } + + let (symoff, is_be) = macho_find_symoff(data, obj_path)?; + + indices + .into_iter() + .filter_map(|idx| { + let abs = symoff + idx * 16 + 6; // nlist_64: 16 bytes/entry, n_desc at offset 6 + if abs + 2 > data.len() { + return None; + } + let old = if is_be { + u16::from_be_bytes(data[abs..abs + 2].try_into().ok()?) + } else { + u16::from_le_bytes(data[abs..abs + 2].try_into().ok()?) + }; + let new_val = old | 0x0040; // N_WEAK_REF + Some((abs, if is_be { new_val.to_be_bytes() } else { new_val.to_le_bytes() })) + }) + .collect() + }; + + if patches.is_empty() { + return Ok(false); + } + for (off, bytes) in patches { + data[off..off + 2].copy_from_slice(&bytes); + } + Ok(true) +} + +/// Walk `LC_SYMTAB` load commands to find the symbol table file offset. +/// Returns `(symoff, is_big_endian)`. +fn macho_find_symoff(data: &[u8], obj_path: &Path) -> Result<(usize, bool), String> { + let header = MachHeader64::::parse(data, 0) + .map_err(|e| format!("parse mach header {}: {e}", obj_path.display()))?; + let endian = header + .endian() + .map_err(|e| format!("mach endian {}: {e}", obj_path.display()))?; + let mut cmds = header + .load_commands(endian, data, 0) + .map_err(|e| format!("load commands {}: {e}", obj_path.display()))?; + loop { + match cmds.next() { + Ok(Some(cmd)) => { + if let Ok(LoadCommandVariant::Symtab(sc)) = cmd.variant() { + return Ok((sc.symoff.get(endian) as usize, endian.is_big_endian())); + } + } + Ok(None) => break, + Err(e) => return Err(format!("{}: load cmd: {e}", obj_path.display())), + } + } + Err(format!("{}: no LC_SYMTAB found", obj_path.display())) +} From 0b30c6df0b9e70ee2b954ecbdbd21a6594326ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Andr=C3=A9=20dos=20Santos=20Lopes?= Date: Mon, 11 May 2026 18:39:55 +0100 Subject: [PATCH 6/9] Don't overweaken .o symbols When searching for .o symbols to weaken cross reference only with EXPORTED symbols from the php binary. --- tools/sidecar_mockgen/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/sidecar_mockgen/src/lib.rs b/tools/sidecar_mockgen/src/lib.rs index ee38712071..cc46bbdc13 100644 --- a/tools/sidecar_mockgen/src/lib.rs +++ b/tools/sidecar_mockgen/src/lib.rs @@ -122,7 +122,7 @@ pub fn weaken_object_symbols(target: &Path, binary: &Path) -> Result<(), String> let so_file = File::parse(bin_data.as_slice()) .map_err(|e| format!("parse {}: {e}", binary.display()))?; let mut result = HashSet::new(); - for sym in so_file.symbols().chain(so_file.dynamic_symbols()) { + for sym in so_file.dynamic_symbols() { if sym_is_definition(&sym) { if let Ok(name) = sym.name() { if undefined_candidates.contains(name) { From c929471276ab3088bf4a3719d361d407e56e78fc Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Tue, 12 May 2026 14:12:40 +0200 Subject: [PATCH 7/9] Dummy Signed-off-by: Bob Weinand --- datadog-sidecar/src/entry.rs | 21 +++++++++++++ spawn_worker/src/direct_entry.c | 56 +++++++++++++++++++++++++++------ spawn_worker/src/unix/spawn.rs | 14 +++++++++ 3 files changed, 81 insertions(+), 10 deletions(-) diff --git a/datadog-sidecar/src/entry.rs b/datadog-sidecar/src/entry.rs index d800a3efd0..08277b818b 100644 --- a/datadog-sidecar/src/entry.rs +++ b/datadog-sidecar/src/entry.rs @@ -263,6 +263,27 @@ pub fn daemonize(listener: IpcServer, mut cfg: Config) -> anyhow::Result<()> { } spawn_cfg.append_env("LSAN_OPTIONS", "detect_leaks=0"); + // In ASAN builds ddtrace.so is the "main object" when exec'd directly by + // ld.so, so libclang_rt.asan lands behind libc in the link map. ASAN + // would otherwise abort with "does not come first in initial library list." + // set_env replaces any inherited ASAN_OPTIONS so getenv in the child finds + // our value first. + #[cfg(target_os = "linux")] + { + let asan_init = unsafe { + libc::dlsym(libc::RTLD_DEFAULT, b"__asan_init\0".as_ptr() as *const _) + }; + if !asan_init.is_null() { + let existing = std::env::var("ASAN_OPTIONS").unwrap_or_default(); + let asan_opts = if existing.is_empty() { + "verify_asan_link_order=0".to_owned() + } else { + format!("{}:verify_asan_link_order=0", existing) + }; + spawn_cfg.set_env("ASAN_OPTIONS", asan_opts); + } + } + setup_daemon_process(listener, &mut spawn_cfg)?; let mut lib_deps = cfg.library_dependencies; diff --git a/spawn_worker/src/direct_entry.c b/spawn_worker/src/direct_entry.c index c81003010b..7cd1a6cd42 100644 --- a/spawn_worker/src/direct_entry.c +++ b/spawn_worker/src/direct_entry.c @@ -59,6 +59,10 @@ static void dlopen_path_deps(void) { } // Called by dl_iterate_phdr to run DT_INIT_ARRAY for our own library. +// Returns 1 when our library is found and processed (stopping iteration), +// 0 otherwise. On musl, dl_iterate_phdr may not include the exec'd library +// (the main object) in its DSO list; in that case dl_iterate_phdr returns 0 +// and we fall back to the AT_PHDR approach below. // Marked no_sanitize so it can run before ASAN's per-object init completes. #ifdef __linux__ static int __attribute__((no_sanitize("address"), no_sanitize("undefined"))) @@ -93,6 +97,7 @@ run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { } return 0; } + #endif // Signal handler: write crash info to stderr AND /tmp/ddog_sidecar_crash_. @@ -204,8 +209,16 @@ __attribute__((visibility("default"))) __attribute__((naked)) void ddog_sidecar_direct_entry(void) { __asm__ ( + /* ld.so jumps here (no call), so there is no return address and no + * previous frame to unwind into. .cfi_undefined rip tells + * _Unwind_Backtrace to stop here rather than walking into garbage, + * which would produce a null _Unwind_Context → SIGSEGV at 0x1 in + * libgcc_s (masking the real ASAN error). + * Note: clang emits .cfi_startproc/.cfi_endproc around naked functions, + * so we must NOT add our own startproc/endproc here. */ + ".cfi_undefined rip\n\t" "and $-16, %rsp\n\t" /* 16-byte align before C prologue sees rsp */ - "call ddog_sidecar_direct_entry_body@PLT\n\t" /* @PLT → R_X86_64_PLT32, valid in .so */ + "call ddog_sidecar_direct_entry_body@PLT\n\t" "ud2" /* unreachable: body calls _exit */ ); } @@ -213,11 +226,28 @@ void ddog_sidecar_direct_entry(void) { __attribute__((naked)) void ddog_sidecar_direct_entry(void) { __asm__ ( + ".cfi_undefined eip\n\t" "and $-16, %esp\n\t" "call ddog_sidecar_direct_entry_body@PLT\n\t" "ud2" ); } +#elif defined(__aarch64__) +/* ld.so branches (not calls) to e_entry on aarch64, so x30 (LR) has no valid + * return address and SP may not be 16-byte aligned. Align SP before the C + * prologue can execute its first `stp x29, x30, [sp, #-16]!` (SIGBUS if + * SP%16 != 0 on aarch64). */ +__attribute__((naked)) +void ddog_sidecar_direct_entry(void) { + __asm__ ( + ".cfi_undefined x30\n\t" /* no valid return address — bottom of stack */ + "mov x9, sp\n\t" + "and x9, x9, #~15\n\t" + "mov sp, x9\n\t" + "bl ddog_sidecar_direct_entry_body\n\t" + "brk #0" /* unreachable: body calls _exit */ + ); +} #else void ddog_sidecar_direct_entry(void) { ddog_sidecar_direct_entry_body(); @@ -226,23 +256,29 @@ void ddog_sidecar_direct_entry(void) { __attribute__((visibility("hidden"), used, noinline)) void ddog_sidecar_direct_entry_body(void) { - // Install crash handler so any fatal signal is captured in a file. - struct sigaction sa = { .sa_sigaction = crash_handler, - .sa_flags = SA_SIGINFO | SA_RESETHAND }; - sigemptyset(&sa.sa_mask); - sigaction(SIGSEGV, &sa, NULL); - sigaction(SIGBUS, &sa, NULL); - sigaction(SIGABRT, &sa, NULL); - sigaction(SIGILL, &sa, NULL); - // Run our own DT_INIT_ARRAY before any other code. // ld.so skips DT_INIT_ARRAY for the main module in direct-exec mode, so // ASAN's per-object global registration and other constructors never run // unless we trigger them explicitly. + // IMPORTANT: crash handler installation must happen AFTER this call. + // During DT_INIT_ARRAY, ASAN may attempt to collect a backtrace via + // _Unwind_Backtrace. That unwind walks through the naked + // ddog_sidecar_direct_entry frame (no CFI), hits a null _Unwind_Context, + // and raises SIGSEGV. If we have already installed our SA_RESETHAND crash + // handler, it fires instead of ASAN's handler, breaking ASAN entirely. #ifdef __linux__ dl_iterate_phdr(run_init_array_cb, (void *)&ddog_sidecar_direct_entry); #endif + // Install crash handler now that the Rust/ASAN runtime is fully up. + struct sigaction sa = { .sa_sigaction = crash_handler, + .sa_flags = SA_SIGINFO | SA_RESETHAND }; + sigemptyset(&sa.sa_mask); + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGBUS, &sa, NULL); + sigaction(SIGABRT, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + const char *sym_name = getenv("_DD_SIDECAR_DIRECT_EXEC"); if (!sym_name || !*sym_name) { _exit(1); diff --git a/spawn_worker/src/unix/spawn.rs b/spawn_worker/src/unix/spawn.rs index ecc998a0a3..5fd8981fff 100644 --- a/spawn_worker/src/unix/spawn.rs +++ b/spawn_worker/src/unix/spawn.rs @@ -296,6 +296,20 @@ impl SpawnWorker { self } + /// Set an env var, removing any existing entry with the same key first. + /// Use this instead of `append_env` when the parent process may already + /// have the variable set and the child must use the new value. + pub fn set_env, V: Into>( + &mut self, + key: K, + value: V, + ) -> &mut Self { + let key = key.into(); + self.env.retain(|(k, _)| k != &key); + self.env.push((key, value.into())); + self + } + fn wait_pid(pid: Option) -> anyhow::Result<()> { let pid = match pid { Some(pid) => Pid::from_raw(pid), From 4ca7d965eb9d037423a2010d886967acdca28a6e Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Tue, 12 May 2026 19:12:39 +0200 Subject: [PATCH 8/9] Cleanup Signed-off-by: Bob Weinand --- datadog-sidecar/src/entry.rs | 9 +- spawn_worker/build.rs | 33 +--- spawn_worker/src/check_execinfo.c | 6 - spawn_worker/src/direct_entry.c | 301 +++++++++--------------------- spawn_worker/src/unix/mod.rs | 4 +- spawn_worker/src/unix/spawn.rs | 13 +- tools/sidecar_mockgen/src/lib.rs | 57 +++--- 7 files changed, 146 insertions(+), 277 deletions(-) delete mode 100644 spawn_worker/src/check_execinfo.c diff --git a/datadog-sidecar/src/entry.rs b/datadog-sidecar/src/entry.rs index 08277b818b..0919f6e649 100644 --- a/datadog-sidecar/src/entry.rs +++ b/datadog-sidecar/src/entry.rs @@ -263,16 +263,15 @@ pub fn daemonize(listener: IpcServer, mut cfg: Config) -> anyhow::Result<()> { } spawn_cfg.append_env("LSAN_OPTIONS", "detect_leaks=0"); - // In ASAN builds ddtrace.so is the "main object" when exec'd directly by - // ld.so, so libclang_rt.asan lands behind libc in the link map. ASAN + // In ASAN builds the sidecar is the "main object" when exec'd directly by + // ld.so, so libclang_rt.asan lands behind libc in the link map. ASAN // would otherwise abort with "does not come first in initial library list." // set_env replaces any inherited ASAN_OPTIONS so getenv in the child finds // our value first. #[cfg(target_os = "linux")] { - let asan_init = unsafe { - libc::dlsym(libc::RTLD_DEFAULT, b"__asan_init\0".as_ptr() as *const _) - }; + let asan_init = + unsafe { libc::dlsym(libc::RTLD_DEFAULT, c"__asan_init".as_ptr() as *const _) }; if !asan_init.is_null() { let existing = std::env::var("ASAN_OPTIONS").unwrap_or_default(); let asan_opts = if existing.is_empty() { diff --git a/spawn_worker/build.rs b/spawn_worker/build.rs index c0bd7178d4..bd363e6ed8 100644 --- a/spawn_worker/build.rs +++ b/spawn_worker/build.rs @@ -8,32 +8,15 @@ fn main() { // Compile the ELF entry point for the shared library (direct exec by ld.so). if target_os == "linux" { - // Detect whether execinfo (backtrace()) is available. - // On glibc it's in libc itself; on musl a separate -lexecinfo may be needed. - // Use a fresh build to probe; don't reuse the main build that has direct_entry.c. - let probe_result = cc::Build::new() - .file("src/check_execinfo.c") - .try_compile("check_execinfo_probe"); - let have_execinfo = probe_result.is_ok(); - println!("cargo:warning=execinfo probe: {} ({:?})", have_execinfo, probe_result.err()); - - let mut build = cc::Build::new(); - build.file("src/direct_entry.c"); - if have_execinfo { - build.define("HAVE_BACKTRACE", "1"); - // On musl, backtrace() lives in libexecinfo (separate package). - // On glibc, it is already in libc — adding -lexecinfo would fail. - let target_env = std::env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default(); - if target_env == "musl" { - println!("cargo:rustc-link-lib=execinfo"); - } - } - - build.compile("ddtrace_direct_entry"); - println!("cargo:rerun-if-changed=src/direct_entry.c"); - println!("cargo:rerun-if-changed=src/check_execinfo.c"); + let mut builder = cc::Build::new(); + builder + .file("src/direct_entry.c") + .warnings(true) + .flag("-g") + .emit_rerun_if_env_changed(true) + .compile("ddog_spawn_direct_entry"); // Note, users of direct mode have to add to their build flags: - // -Wl,-e,ddog_sidecar_direct_entry + // -Wl,-e,ddog_spawn_direct_entry } let mut builder = cc_utils::ImprovedBuild::new(); diff --git a/spawn_worker/src/check_execinfo.c b/spawn_worker/src/check_execinfo.c deleted file mode 100644 index fcd328455e..0000000000 --- a/spawn_worker/src/check_execinfo.c +++ /dev/null @@ -1,6 +0,0 @@ -/* Probe: does backtrace() exist and link successfully? */ -#include -int main(void) { - void *buf[1]; - return backtrace(buf, 1) >= 0 ? 0 : 1; -} diff --git a/spawn_worker/src/direct_entry.c b/spawn_worker/src/direct_entry.c index 7cd1a6cd42..80fcb3b095 100644 --- a/spawn_worker/src/direct_entry.c +++ b/spawn_worker/src/direct_entry.c @@ -1,37 +1,35 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -// This file provides the ELF entry point (ddog_sidecar_direct_entry) for the -// shared library that contains spawn_worker (ddtrace.so in non-SSI builds, -// libddtrace_php.so in SSI builds). When ld.so exec's that library directly, -// it calls this function rather than the trampoline. -// -// Linked as the ELF e_entry via: -// - cargo:rustc-cdylib-link-arg=-Wl,-e,ddog_sidecar_direct_entry (cdylib / SSI) -// - -Wl,-e,ddog_sidecar_direct_entry in EXTRA_LDFLAGS (ddtrace.so / non-SSI) +// This file provides the ELF entry point (ddog_spawn_direct_entry) for the +// shared library that contains spawn_worker. When ld.so exec's that library +// directly, it calls this function rather than the trampoline. +// _DD_SIDECAR_DIRECT_EXEC must be set to the name of the symbol to call #define _GNU_SOURCE #include #include #include -#ifdef __linux__ -# include -#endif #include #include #include #include #include -#ifdef __linux__ -# include -# include -#endif -/* HAVE_BACKTRACE is defined by build.rs when execinfo is available and links */ -#ifdef HAVE_BACKTRACE -# include -#endif +#include +#include + +static inline FILE *error_fd() { + char *log_env = getenv("DD_TRACE_LOG_FILE"); + if (log_env) { + FILE *file = fopen(log_env, "a"); + if (file) { + return file; + } + } + return stderr; +} -// All fields are null/zero when calling from Direct spawn (no deps to clean up). +// All fields are zero here (no deps to clean up). typedef struct { int argc; const char **argv; @@ -41,247 +39,131 @@ typedef struct { // dlopen() each colon-separated path in _DD_SIDECAR_PATH_DEPS. static void dlopen_path_deps(void) { const char *deps = getenv("_DD_SIDECAR_PATH_DEPS"); - if (!deps || !*deps) return; + if (!deps || !*deps) { + return; + } // Work on a copy so we can NUL-terminate each token in place. size_t len = strlen(deps); char *buf = alloca(len + 1); memcpy(buf, deps, len + 1); - char *p = buf; - while (*p) { - char *colon = strchr(p, ':'); - if (colon) *colon = '\0'; - if (*p) dlopen(p, RTLD_LAZY | RTLD_GLOBAL); - if (!colon) break; - p = colon + 1; + char *path = buf; + while (*path) { + char *colon = strchr(path, ':'); + if (colon) { + *colon = '\0'; + } + if (*path) { + if (!dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) { + fputs(dlerror(), error_fd()); + _exit(11); + } + } + if (!colon) { + break; + } + path = colon + 1; } } // Called by dl_iterate_phdr to run DT_INIT_ARRAY for our own library. // Returns 1 when our library is found and processed (stopping iteration), -// 0 otherwise. On musl, dl_iterate_phdr may not include the exec'd library -// (the main object) in its DSO list; in that case dl_iterate_phdr returns 0 -// and we fall back to the AT_PHDR approach below. +// 0 otherwise. // Marked no_sanitize so it can run before ASAN's per-object init completes. -#ifdef __linux__ static int __attribute__((no_sanitize("address"), no_sanitize("undefined"))) run_init_array_cb(struct dl_phdr_info *info, size_t size, void *self_addr) { + (void)size; for (int i = 0; i < info->dlpi_phnum; i++) { - if (info->dlpi_phdr[i].p_type != PT_LOAD) continue; + if (info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + uintptr_t start = info->dlpi_addr + info->dlpi_phdr[i].p_vaddr; - uintptr_t end = start + info->dlpi_phdr[i].p_memsz; - if ((uintptr_t)self_addr < start || (uintptr_t)self_addr >= end) continue; + uintptr_t end = start + info->dlpi_phdr[i].p_memsz; + if ((uintptr_t)self_addr < start || (uintptr_t)self_addr >= end) { + continue; + } + // Found our library — locate DT_INIT_ARRAY in its DYNAMIC segment. for (int j = 0; j < info->dlpi_phnum; j++) { - if (info->dlpi_phdr[j].p_type != PT_DYNAMIC) continue; - ElfW(Dyn) *dyn = (ElfW(Dyn) *)(info->dlpi_addr + info->dlpi_phdr[j].p_vaddr); - void (**arr)(void) = NULL; - size_t sz = 0; - for (; dyn->d_tag != DT_NULL; dyn++) { - if (dyn->d_tag == DT_INIT_ARRAY) - arr = (void (**)(void))(info->dlpi_addr + dyn->d_un.d_ptr); - if (dyn->d_tag == DT_INIT_ARRAYSZ) - sz = dyn->d_un.d_val; - } - if (arr) { - typedef void (*init_fn_t)(int, char **, char **); - extern char **environ; - for (size_t k = 0; k < sz / sizeof(void *); k++) { - if (arr[k] && (uintptr_t)arr[k] != (uintptr_t)-1) - ((init_fn_t)arr[k])(0, NULL, environ); + if (info->dlpi_phdr[j].p_type == PT_DYNAMIC) { + void (**arr)(void) = NULL; + size_t sz = 0; + for (ElfW(Dyn) *dyn = (ElfW(Dyn) *)(info->dlpi_addr + info->dlpi_phdr[j].p_vaddr); dyn->d_tag != DT_NULL; dyn++) { + if (dyn->d_tag == DT_INIT_ARRAY) { + arr = (void (**)(void))(info->dlpi_addr + dyn->d_un.d_ptr); + } + if (dyn->d_tag == DT_INIT_ARRAYSZ) { + sz = dyn->d_un.d_val; + } + } + if (arr) { + typedef void (*init_fn_t)(int, char **, char **); + extern char **environ; + for (size_t k = 0; k < sz / sizeof(void *); k++) { + if (arr[k] && (uintptr_t)arr[k] != (uintptr_t)-1) { + ((init_fn_t)arr[k])(0, NULL, environ); + } + } } + return 1; } - return 1; } } return 0; } -#endif - -// Signal handler: write crash info to stderr AND /tmp/ddog_sidecar_crash_. -// Uses only async-signal-safe functions. -static void crash_handler(int sig, siginfo_t *si, void *ctx) { - (void)si; - char path[64]; - pid_t pid = getpid(); - const char prefix[] = "/tmp/ddog_sidecar_crash_"; - int pos = 0; - for (int i = 0; prefix[i]; i++) path[pos++] = prefix[i]; - char pidbuf[20]; int plen = 0; - unsigned long p = (unsigned long)pid; - if (!p) { pidbuf[plen++] = '0'; } - else { char tmp[20]; int tl = 0; while (p) { tmp[tl++] = '0' + (int)(p % 10); p /= 10; } - for (int i = tl-1; i >= 0; i--) pidbuf[plen++] = tmp[i]; } - for (int i = 0; i < plen; i++) path[pos++] = pidbuf[i]; - path[pos] = '\0'; - - int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644); - int fds[2] = { STDERR_FILENO, fd }; - - const char hdr[] = "\n=== ddog_sidecar_direct_entry: fatal signal ===\n"; - for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], hdr, sizeof(hdr) - 1); - - // Write signal number and fault/crash addresses using only async-signal-safe ops. - // backtrace() is NOT called: the and$-16 stack alignment breaks CFI, causing - // _Unwind_Backtrace to fault. Instead we extract the IP directly from ucontext. - static const char hex[] = "0123456789abcdef"; - // Helper: write "label: 0xHEX\n" for an unsigned long -#define WRITE_HEX(label, val) do { \ - const char _lab[] = label ": 0x"; \ - for (int _i = 0; _i < 2; _i++) if (fds[_i] >= 0) write(fds[_i], _lab, sizeof(_lab)-1); \ - char _hbuf[18]; int _hl = 0; unsigned long _v = (unsigned long)(val); \ - if (!_v) { _hbuf[_hl++] = '0'; } \ - else { char _tmp[16]; int _tl = 0; while (_v) { _tmp[_tl++] = hex[_v&0xf]; _v>>=4; } \ - for (int _j=_tl-1;_j>=0;_j--) _hbuf[_hl++]=_tmp[_j]; } \ - _hbuf[_hl++] = '\n'; \ - for (int _i = 0; _i < 2; _i++) if (fds[_i] >= 0) write(fds[_i], _hbuf, _hl); \ - } while(0) - - { int s = sig; char sl[24] = "signal: "; int sll = 8; - char stmp[10]; int stl = 0; - if (!s) stmp[stl++] = '0'; - else { while (s > 0) { stmp[stl++] = '0' + s % 10; s /= 10; } } - for (int i = stl-1; i >= 0; i--) sl[sll++] = stmp[i]; - sl[sll++] = '\n'; - for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], sl, sll); } - - if (si) WRITE_HEX("fault_addr", si->si_addr); - -#if defined(__linux__) && defined(__x86_64__) - if (ctx) { - ucontext_t *uc = (ucontext_t *)ctx; - WRITE_HEX("rip", uc->uc_mcontext.gregs[REG_RIP]); - WRITE_HEX("rsp", uc->uc_mcontext.gregs[REG_RSP]); - } -#elif defined(__linux__) && defined(__aarch64__) - if (ctx) { - ucontext_t *uc = (ucontext_t *)ctx; - WRITE_HEX("pc", uc->uc_mcontext.pc); - WRITE_HEX("sp", uc->uc_mcontext.sp); - } -#endif -#undef WRITE_HEX - - // Dump /proc/self/maps so RIP can be attributed to a library -#ifdef __linux__ - { - const char maps_hdr[] = "\n=== /proc/self/maps ===\n"; - for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], maps_hdr, sizeof(maps_hdr)-1); - int mfd = open("/proc/self/maps", O_RDONLY); - if (mfd >= 0) { - char mbuf[4096]; - ssize_t n; - while ((n = read(mfd, mbuf, sizeof(mbuf))) > 0) - for (int i = 0; i < 2; i++) if (fds[i] >= 0) write(fds[i], mbuf, (size_t)n); - close(mfd); - } - } -#endif - - if (fd >= 0) close(fd); - - struct sigaction sa = { .sa_handler = SIG_DFL }; - sigemptyset(&sa.sa_mask); - sigaction(sig, &sa, NULL); - raise(sig); -} - -// Called by ld.so when the library is exec'd directly. -// Linked as the ELF e_entry. -// -// _DD_SIDECAR_DIRECT_EXEC must be set to the name of the symbol to call - // Hidden (not static) so asm can reference it by name. // @PLT in the call generates R_X86_64_PLT32 which old linkers accept for shared objects. // 'used' prevents LTO from dropping it (it's only called from the naked asm below). __attribute__((visibility("hidden"), used, noinline)) -void ddog_sidecar_direct_entry_body(void); +void ddog_spawn_direct_entry_body(void); -// Naked wrapper: ld.so JUMPs (not calls) to e_entry, so rsp alignment is -// unpredictable. We must align the stack BEFORE the C prologue runs — doing -// it inside the function body is too late because the prologue already anchors -// rbp from the unaligned rsp, causing movaps on rbp-relative locals to fault -// with #GP (reported as SIGSEGV si_addr=0 on Linux). +// Naked wrapper: ld.so JUMPs (not calls) to e_entry, so stack pointer alignment is +// unpredictable. We must align the stack BEFORE the C prologue runs: doing +// it inside the function body is too late because the prologue will already alter it. __attribute__((visibility("default"))) #if defined(__x86_64__) __attribute__((naked)) -void ddog_sidecar_direct_entry(void) { +void ddog_spawn_direct_entry(void) { __asm__ ( - /* ld.so jumps here (no call), so there is no return address and no - * previous frame to unwind into. .cfi_undefined rip tells - * _Unwind_Backtrace to stop here rather than walking into garbage, - * which would produce a null _Unwind_Context → SIGSEGV at 0x1 in - * libgcc_s (masking the real ASAN error). - * Note: clang emits .cfi_startproc/.cfi_endproc around naked functions, - * so we must NOT add our own startproc/endproc here. */ - ".cfi_undefined rip\n\t" - "and $-16, %rsp\n\t" /* 16-byte align before C prologue sees rsp */ - "call ddog_sidecar_direct_entry_body@PLT\n\t" - "ud2" /* unreachable: body calls _exit */ - ); -} -#elif defined(__i386__) -__attribute__((naked)) -void ddog_sidecar_direct_entry(void) { - __asm__ ( - ".cfi_undefined eip\n\t" - "and $-16, %esp\n\t" - "call ddog_sidecar_direct_entry_body@PLT\n\t" - "ud2" + ".cfi_undefined rip\n\t" /* no valid return address: bottom of stack */ + "and $-16, %rsp\n\t" + "call ddog_spawn_direct_entry_body@PLT\n\t" + "ud2" /* unreachable: body calls _exit */ ); } #elif defined(__aarch64__) -/* ld.so branches (not calls) to e_entry on aarch64, so x30 (LR) has no valid - * return address and SP may not be 16-byte aligned. Align SP before the C - * prologue can execute its first `stp x29, x30, [sp, #-16]!` (SIGBUS if - * SP%16 != 0 on aarch64). */ __attribute__((naked)) -void ddog_sidecar_direct_entry(void) { +void ddog_spawn_direct_entry(void) { __asm__ ( - ".cfi_undefined x30\n\t" /* no valid return address — bottom of stack */ + ".cfi_undefined x30\n\t" /* no valid return address: bottom of stack */ "mov x9, sp\n\t" "and x9, x9, #~15\n\t" "mov sp, x9\n\t" - "bl ddog_sidecar_direct_entry_body\n\t" - "brk #0" /* unreachable: body calls _exit */ + "bl ddog_spawn_direct_entry_body\n\t" + "brk #0" /* unreachable: body calls _exit */ ); } #else -void ddog_sidecar_direct_entry(void) { - ddog_sidecar_direct_entry_body(); +void ddog_spawn_direct_entry(void) { + ddog_spawn_direct_entry_body(); } #endif __attribute__((visibility("hidden"), used, noinline)) -void ddog_sidecar_direct_entry_body(void) { +void ddog_spawn_direct_entry_body(void) { // Run our own DT_INIT_ARRAY before any other code. // ld.so skips DT_INIT_ARRAY for the main module in direct-exec mode, so // ASAN's per-object global registration and other constructors never run // unless we trigger them explicitly. - // IMPORTANT: crash handler installation must happen AFTER this call. - // During DT_INIT_ARRAY, ASAN may attempt to collect a backtrace via - // _Unwind_Backtrace. That unwind walks through the naked - // ddog_sidecar_direct_entry frame (no CFI), hits a null _Unwind_Context, - // and raises SIGSEGV. If we have already installed our SA_RESETHAND crash - // handler, it fires instead of ASAN's handler, breaking ASAN entirely. -#ifdef __linux__ - dl_iterate_phdr(run_init_array_cb, (void *)&ddog_sidecar_direct_entry); -#endif - - // Install crash handler now that the Rust/ASAN runtime is fully up. - struct sigaction sa = { .sa_sigaction = crash_handler, - .sa_flags = SA_SIGINFO | SA_RESETHAND }; - sigemptyset(&sa.sa_mask); - sigaction(SIGSEGV, &sa, NULL); - sigaction(SIGBUS, &sa, NULL); - sigaction(SIGABRT, &sa, NULL); - sigaction(SIGILL, &sa, NULL); + dl_iterate_phdr(run_init_array_cb, (void *)&ddog_spawn_direct_entry); - const char *sym_name = getenv("_DD_SIDECAR_DIRECT_EXEC"); - if (!sym_name || !*sym_name) { - _exit(1); + const char *symbol_name = getenv("_DD_SIDECAR_DIRECT_EXEC"); + if (!symbol_name || !*symbol_name) { + fputs("_DD_SIDECAR_DIRECT_EXEC is not set. Aborting.", error_fd()); + _exit(2); } // Load any path-dep libraries listed in _DD_SIDECAR_PATH_DEPS. @@ -290,10 +172,13 @@ void ddog_sidecar_direct_entry_body(void) { // Call the requested symbol — avoids a link-time dependency on // datadog-sidecar from spawn_worker. typedef void (*entry_fn_t)(const trampoline_data_t *); - entry_fn_t entry = (entry_fn_t)dlsym(RTLD_DEFAULT, sym_name); + entry_fn_t entry = (entry_fn_t)dlsym(RTLD_DEFAULT, symbol_name); if (entry) { trampoline_data_t data = {0}; entry(&data); + } else { + fprintf(error_fd(), "fn was not found; missing %s in binary", symbol_name); + _exit(12); } _exit(0); } diff --git a/spawn_worker/src/unix/mod.rs b/spawn_worker/src/unix/mod.rs index fb02541c8d..d7aa026044 100644 --- a/spawn_worker/src/unix/mod.rs +++ b/spawn_worker/src/unix/mod.rs @@ -59,7 +59,7 @@ pub fn getpid() -> libc::pid_t { /// Return the path to the dynamic linker (PT_INTERP) of the current process. #[cfg(target_os = "linux")] -pub fn read_pt_interp_self() -> Option { +pub fn read_pt_interp_self() -> Option { // Auxiliary vector entries for the current process's executable PHDRs. // SAFETY: getauxval is signal-safe and idempotent. let phdr_addr = unsafe { libc::getauxval(libc::AT_PHDR) } as usize; @@ -101,7 +101,7 @@ pub fn read_pt_interp_self() -> Option { // SAFETY: the interpreter path is a valid C string placed by the kernel in the mapped // PT_INTERP segment; it is readable for the lifetime of the process. let interp = unsafe { CStr::from_ptr(interp_ptr) }; - Some(std::path::PathBuf::from(interp.to_string_lossy().as_ref())) + Some(PathBuf::from(interp.to_string_lossy().as_ref())) } impl Entrypoint { diff --git a/spawn_worker/src/unix/spawn.rs b/spawn_worker/src/unix/spawn.rs index 5fd8981fff..4e4c32e56f 100644 --- a/spawn_worker/src/unix/spawn.rs +++ b/spawn_worker/src/unix/spawn.rs @@ -299,11 +299,7 @@ impl SpawnWorker { /// Set an env var, removing any existing entry with the same key first. /// Use this instead of `append_env` when the parent process may already /// have the variable set and the child must use the new value. - pub fn set_env, V: Into>( - &mut self, - key: K, - value: V, - ) -> &mut Self { + pub fn set_env, V: Into>(&mut self, key: K, value: V) -> &mut Self { let key = key.into(); self.env.retain(|(k, _)| k != &key); self.env.push((key, value.into())); @@ -444,6 +440,10 @@ impl SpawnWorker { .ok_or_else(|| anyhow::format_err!("non-UTF8 interp path"))?, )?) } else { + None + }; + + if !use_direct { for dep in &self.shared_lib_dependencies { match dep { LibDependency::Path(path) => { @@ -490,8 +490,7 @@ impl SpawnWorker { } argv.push(entrypoint_symbol_name); - None - }; + } // build and allocate final exec fn and its dependencies #[cfg(target_os = "linux")] diff --git a/tools/sidecar_mockgen/src/lib.rs b/tools/sidecar_mockgen/src/lib.rs index cc46bbdc13..486f5eb2e0 100644 --- a/tools/sidecar_mockgen/src/lib.rs +++ b/tools/sidecar_mockgen/src/lib.rs @@ -3,7 +3,10 @@ use object::macho::MachHeader64; use object::read::macho::{LoadCommandVariant, MachHeader}; -use object::{Endian, Endianness, File, FileKind, Object, ObjectSection, ObjectSymbol, Symbol, SymbolFlags, SymbolKind}; +use object::{ + Endian, Endianness, File, FileKind, Object, ObjectSection, ObjectSymbol, Symbol, SymbolFlags, + SymbolKind, +}; use std::collections::HashSet; use std::fmt::Write; use std::path::Path; @@ -25,7 +28,6 @@ fn check_and_parse<'a>( } } - fn sym_is_definition(sym: &Symbol) -> bool { if sym.is_definition() { return true; @@ -106,19 +108,18 @@ pub fn generate_mock_symbols(binary: &Path, objects: &[&Path]) -> Result Result<(), String> { - let data = fs::read(target) - .map_err(|e| format!("read {}: {e}", target.display()))?; + let data = fs::read(target).map_err(|e| format!("read {}: {e}", target.display()))?; let undefined_candidates: HashSet = File::parse(data.as_slice()) - .map_err(|e| format!("parse {}: {e}", target.display()))?.symbols() - .filter(|s| s.is_undefined() && !s.is_weak()) - .filter_map(|s| s.name().ok().map(|n| n.to_string())) - .collect(); + .map_err(|e| format!("parse {}: {e}", target.display()))? + .symbols() + .filter(|s| s.is_undefined() && !s.is_weak()) + .filter_map(|s| s.name().ok().map(|n| n.to_string())) + .collect(); // Filter symbols from binary. let symbols = { - let bin_data = fs::read(binary) - .map_err(|e| format!("read {}: {e}", binary.display()))?; + let bin_data = fs::read(binary).map_err(|e| format!("read {}: {e}", binary.display()))?; let so_file = File::parse(bin_data.as_slice()) .map_err(|e| format!("parse {}: {e}", binary.display()))?; let mut result = HashSet::new(); @@ -144,8 +145,7 @@ pub fn weaken_object_symbols(target: &Path, binary: &Path) -> Result<(), String> /// - ELF64: flips `st_bind` from `STB_GLOBAL(1)` → `STB_WEAK(2)` in `.symtab` /// - Mach-O64: sets `N_WEAK_REF(0x0040)` in `n_desc` in `LC_SYMTAB` fn weaken_symtab(obj_path: &Path, symbols: &HashSet) -> Result<(), String> { - let mut data = fs::read(obj_path) - .map_err(|e| format!("read {}: {e}", obj_path.display()))?; + let mut data = fs::read(obj_path).map_err(|e| format!("read {}: {e}", obj_path.display()))?; let modified = match FileKind::parse(data.as_slice()) .map_err(|e| format!("parse {}: {e}", obj_path.display()))? @@ -156,16 +156,14 @@ fn weaken_symtab(obj_path: &Path, symbols: &HashSet) -> Result<(), Strin }; if modified { - fs::write(obj_path, &data) - .map_err(|e| format!("write {}: {e}", obj_path.display()))?; + fs::write(obj_path, &data).map_err(|e| format!("write {}: {e}", obj_path.display()))?; } Ok(()) } fn weaken_elf(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> Result { let patches: Vec = { - let elf = File::parse(&*data) - .map_err(|e| format!("parse {}: {e}", obj_path.display()))?; + let elf = File::parse(&*data).map_err(|e| format!("parse {}: {e}", obj_path.display()))?; let symtab = match elf.section_by_name(".symtab") { Some(s) => s, @@ -179,7 +177,7 @@ fn weaken_elf(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> Re .filter(|sym| { sym.is_undefined() && !sym.is_weak() - && sym.name().map_or(false, |n| symbols.contains(n)) + && sym.name().is_ok_and(|n| symbols.contains(n)) }) .map(|sym| (symtab_off + sym.index().0 as u64 * 24 + 4) as usize) // sizeof(Elf64_Sym)=24; st_info at +4 .collect() @@ -195,10 +193,14 @@ fn weaken_elf(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> Re Ok(true) } -fn weaken_macho(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> Result { +fn weaken_macho( + data: &mut [u8], + symbols: &HashSet, + obj_path: &Path, +) -> Result { let patches: Vec<(usize, [u8; 2])> = { - let file = File::parse(&*data) - .map_err(|e| format!("parse macho {}: {e}", obj_path.display()))?; + let file = + File::parse(&*data).map_err(|e| format!("parse macho {}: {e}", obj_path.display()))?; // Mach-O symbol names have a leading '_' stripped when `symbols` was built. let indices: Vec = file @@ -206,9 +208,9 @@ fn weaken_macho(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> .filter(|sym| { sym.is_undefined() && !sym.is_weak() - && sym.name().map_or(false, |n| { - symbols.contains(n.strip_prefix('_').unwrap_or(n)) - }) + && sym + .name() + .is_ok_and(|n| symbols.contains(n.strip_prefix('_').unwrap_or(n))) }) .map(|sym| sym.index().0) .collect(); @@ -232,7 +234,14 @@ fn weaken_macho(data: &mut [u8], symbols: &HashSet, obj_path: &Path) -> u16::from_le_bytes(data[abs..abs + 2].try_into().ok()?) }; let new_val = old | 0x0040; // N_WEAK_REF - Some((abs, if is_be { new_val.to_be_bytes() } else { new_val.to_le_bytes() })) + Some(( + abs, + if is_be { + new_val.to_be_bytes() + } else { + new_val.to_le_bytes() + }, + )) }) .collect() }; From 3944df6e45d423f28356636835b84f9c711317f4 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 13 May 2026 17:35:14 +0200 Subject: [PATCH 9/9] Use constant Signed-off-by: Bob Weinand --- spawn_worker/src/unix/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spawn_worker/src/unix/mod.rs b/spawn_worker/src/unix/mod.rs index d7aa026044..4a48a1103c 100644 --- a/spawn_worker/src/unix/mod.rs +++ b/spawn_worker/src/unix/mod.rs @@ -86,8 +86,7 @@ pub fn read_pt_interp_self() -> Option { // load_base = runtime_addr_of_PHDRs − link-time vaddr of PHDRs load_base = phdr_addr as isize - p_vaddr as isize; } - if p_type == 3 { - // PT_INTERP = 3; the interpreter string lives at this vaddr. + if p_type == libc::PT_INTERP { interp_vaddr = p_vaddr; } }