CodSpeedHQ · not-matthias · Jul 3, 2026 · Jul 3, 2026 · greptile-apps · Jul 3, 2026
diff --git a/src/executor/shared/fifo.rs b/src/executor/shared/fifo.rs
@@ -1,3 +1,4 @@
+use crate::executor::shared::proc_maps::{read_proc_maps, ProcMapping};
 use crate::prelude::*;
 use anyhow::Context;
 use futures::StreamExt;
@@ -7,7 +8,10 @@ use runner_shared::fifo::{RUNNER_ACK_FIFO, RUNNER_CTL_FIFO};
 use std::cmp::Ordering;
 use std::os::unix::fs::OpenOptionsExt;
 use std::path::{Path, PathBuf};
-use std::{collections::HashSet, time::Duration};
+use std::{
+    collections::{HashMap, HashSet},
+    time::Duration,
+};
 use tokio::io::AsyncWriteExt;
 use tokio::net::unix::pid_t;
 use tokio::net::unix::pipe::Receiver as TokioPipeReader;
@@ -69,6 +73,11 @@ pub struct FifoBenchmarkData {
     /// Name and version of the integration
     pub integration: Option<(String, String)>,
     pub bench_pids: HashSet<pid_t>,
+    /// Executable file mappings snapshotted from `/proc/<pid>/maps` the first time
+    /// each benchmark pid announced itself over the FIFO (while it was still alive).
+    /// Backfills code mappings that perf's `MMAP2` records miss for processes that
+    /// exec()'d while sampling was disabled.
+    pub maps_by_pid: HashMap<pid_t, Vec<ProcMapping>>,
 }
 
 impl FifoBenchmarkData {
@@ -171,6 +180,7 @@ impl RunnerFifo {
     )> {
         let mut bench_order_by_timestamp = Vec::<(u64, String)>::new();
         let mut bench_pids = HashSet::<pid_t>::new();
+        let mut maps_by_pid = HashMap::<pid_t, Vec<ProcMapping>>::new();
         let mut markers = Vec::<MarkerType>::new();
 
         let mut integration = None;
@@ -207,7 +217,15 @@ impl RunnerFifo {
                 match &cmd {
                     FifoCommand::CurrentBenchmark { pid, uri } => {
                         bench_order_by_timestamp.push((get_current_time(), uri.to_string()));
-                        bench_pids.insert(*pid);
+                        // Snapshot the process's code mappings the first time we see it,
+                        // while it is still alive, to recover mappings perf drops for
+                        // processes that exec()'d while sampling was disabled.
+                        if bench_pids.insert(*pid) {
+                            let mappings = read_proc_maps(*pid);
+                            if !mappings.is_empty() {
+                                maps_by_pid.insert(*pid, mappings);
+                            }
+                        }
                         self.send_cmd(FifoCommand::Ack).await?;
                     }
                     FifoCommand::StartProfiler => {
@@ -278,6 +296,7 @@ impl RunnerFifo {
                     let fifo_data = FifoBenchmarkData {
                         integration,
                         bench_pids,
+                        maps_by_pid,
                     };
                     return Ok((marker_result, fifo_data, exit_status));
                 }

diff --git a/src/executor/shared/mod.rs b/src/executor/shared/mod.rs
@@ -1 +1,2 @@
 pub mod fifo;
+pub mod proc_maps;
diff --git a/src/executor/shared/proc_maps.rs b/src/executor/shared/proc_maps.rs
@@ -0,0 +1,136 @@
+use crate::prelude::*;
+use std::path::PathBuf;
+use tokio::net::unix::pid_t;
+
+/// One executable file mapping of a live process, read from `/proc/<pid>/maps`.
+#[derive(Debug, Clone)]
+pub struct ProcMapping {
+    /// Absolute path of the mapped ELF file.
+    pub path: PathBuf,
+    /// Runtime start address of the mapping.
+    pub start: u64,
+    /// Runtime end address of the mapping.
+    pub end: u64,
+    /// File offset of the mapping (matches perf's `MMAP2` page offset).
+    pub offset: u64,
+}
+
+/// Best-effort snapshot of a live process's executable file mappings.
+///
+/// perf only emits `MMAP2`/`COMM` while its event is enabled, so any benchmark
+/// process that exec()'d while sampling was disabled (every one after the first in
+/// a run) is missing its code mappings in the perf data. Reading `/proc/<pid>/maps`
+/// while the process is alive recovers those load addresses so its samples can be
+/// symbolized. Returns empty on any error rather than failing the run.
+pub fn read_proc_maps(pid: pid_t) -> Vec<ProcMapping> {
+    if !cfg!(target_os = "linux") {
+        return Vec::new();
+    }
+
+    let path = format!("/proc/{pid}/maps");
+    match std::fs::read_to_string(&path) {
+        Ok(content) => parse_proc_maps(&content),
+        Err(e) => {
+            warn!("Failed to read {path} for benchmark pid {pid}: {e}");
+            Vec::new()
+        }
+    }
+}
+
+/// Parse the executable file mappings out of `/proc/<pid>/maps` content.
+///
+/// Each line is `start-end perms offset dev inode pathname`. We keep only
+/// executable (`x`) file-backed mappings; anonymous/special (`[vdso]`, `[heap]`, …)
+/// and deleted files are skipped. The numeric fields never contain `/` or `[`, so
+/// the pathname is taken from the first such character, which tolerates spaces in
+/// the path.
+fn parse_proc_maps(content: &str) -> Vec<ProcMapping> {
+    let mut mappings = Vec::new();
+
+    for line in content.lines() {
+        let mut fields = line.split_whitespace();
+        let (Some(range), Some(perms), Some(offset)) =
+            (fields.next(), fields.next(), fields.next())
+        else {
+            continue;
+        };
+
+        if !perms.contains('x') {
+            continue;
+        }
+
+        let Some(path_start) = line.find(['/', '[']) else {
+            continue;
+        };
+        let pathname = line[path_start..].trim();
+        if pathname.starts_with('[') {
+            continue;
+        }
+        let pathname = pathname.strip_suffix(" (deleted)").unwrap_or(pathname);
+
+        let Some((start, end)) = range.split_once('-') else {
+            continue;
+        };
+        let (Ok(start), Ok(end), Ok(offset)) = (
+            u64::from_str_radix(start, 16),
+            u64::from_str_radix(end, 16),
+            u64::from_str_radix(offset, 16),
+        ) else {
+            continue;
+        };
+
+        mappings.push(ProcMapping {
+            path: PathBuf::from(pathname),
+            start,
+            end,
+            offset,
+        });
+    }
+
+    mappings
+}
+
+#[cfg(all(test, target_os = "linux"))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_proc_maps_keeps_only_executable_file_mappings() {
+        let content = "\
+55a0f0000000-55a0f0001000 r--p 00000000 fe:01 100 /bin/bench
+55a0f0001000-55a0f0100000 r-xp 00001000 fe:01 100 /bin/bench
+55a0f0100000-55a0f0200000 r--p 00100000 fe:01 100 /bin/bench
+7f00aa000000-7f00aa100000 r-xp 00002000 fe:01 200 /usr/lib/libc.so.6
+7f00bb000000-7f00bb001000 rw-p 00000000 00:00 0
+7ffd00000000-7ffd00021000 rw-p 00000000 00:00 0 [stack]
+7ffd00021000-7ffd00023000 r-xp 00000000 00:00 0 [vdso]
+7f00cc000000-7f00cc010000 r-xp 00000000 fe:01 300 /tmp/old.so (deleted)
+";
+        let maps = parse_proc_maps(content);
+
+        // Only the two live executable file mappings (bench .text, libc .text) and the
+        // deleted one (path stripped) are kept; non-exec, anon, [stack], [vdso] dropped.
+        let paths: Vec<_> = maps
+            .iter()
+            .map(|m| m.path.to_string_lossy().into_owned())
+            .collect();
+        assert_eq!(paths, vec!["/bin/bench", "/usr/lib/libc.so.6", "/tmp/old.so"]);
+
+        let bench = &maps[0];
+        assert_eq!(bench.start, 0x55a0f0001000);
+        assert_eq!(bench.end, 0x55a0f0100000);
+        assert_eq!(bench.offset, 0x1000);
+    }
+
+    #[test]
+    fn parse_proc_maps_handles_paths_with_spaces() {
+        let content =
+            "400000-401000 r-xp 00000000 fe:01 100 /home/user/my bench/target/deps/thing\n";
+        let maps = parse_proc_maps(content);
+        assert_eq!(maps.len(), 1);
+        assert_eq!(
+            maps[0].path.to_string_lossy(),
+            "/home/user/my bench/target/deps/thing"
+        );
+    }
+}
diff --git a/src/executor/wall_time/profiler/perf/mod.rs b/src/executor/wall_time/profiler/perf/mod.rs
@@ -268,13 +268,39 @@ impl BenchmarkData<'_> {
         debug!("Pid filter for perf file parsing: {pid_filter:?}");
         debug!("Reading perf data from file for mmap extraction");
         let MemmapRecordsOutput {
-            loaded_modules_by_path,
+            mut loaded_modules_by_path,
             tracked_pids,
         } = parse_perf_file::parse_for_memmap2(perf_file_path, pid_filter).map_err(|e| {
             error!("Failed to parse perf file: {e}");
             BenchmarkDataSaveError::FailedToParsePerfFile
         })?;
 
+        // perf only emits MMAP2 while its event is enabled, so benchmark processes
+        // that exec()'d while sampling was disabled (every one after the first) are
+        // missing their executable mappings above. Backfill them from the
+        // /proc/<pid>/maps snapshots taken live when each pid announced itself.
+        for (pid, mappings) in &self.fifo_data.maps_by_pid {
+            for mapping in mappings {
+                let already_mapped = loaded_modules_by_path
+                    .get(&mapping.path)
+                    .is_some_and(|module| module.process_loaded_modules.contains_key(pid));
+                if already_mapped {
+                    continue;
+                }
+
+                let path_string = mapping.path.to_string_lossy();
+                parse_perf_file::add_module_mapping(
+                    &mut loaded_modules_by_path,
+                    &mapping.path,
+                    &path_string,
+                    mapping.start,
+                    mapping.end,
+                    mapping.offset,
+                    *pid,
+                );
+            }
+        }
+
         // Harvest the perf maps generated by python. This will copy the perf
         // maps from /tmp to the profile folder. We have to write our own perf
         // maps to these files AFTERWARDS, otherwise it'll be overwritten!

diff --git a/src/executor/wall_time/profiler/perf/parse_perf_file.rs b/src/executor/wall_time/profiler/perf/parse_perf_file.rs
@@ -245,31 +245,49 @@ fn process_mmap2_record(
         record.protection,
     );
 
-    let load_bias = match ModuleSymbols::compute_load_bias(
+    add_module_mapping(
+        loaded_modules_by_path,
         &record_path,
+        &record_path_string,
         record.address,
         end_addr,
         record.page_offset,
-    ) {
-        Ok(load_bias) => load_bias,
-        Err(e) => {
-            debug!("Failed to compute load bias for {record_path_string}: {e}");
-            return;
-        }
-    };
+        record.pid,
+    );
+}
+
+/// Record one executable module mapping for `pid`: compute its load bias, extract
+/// the module's ELF symbols (once per module) and its per-process unwind data.
+///
+/// Shared by the perf `MMAP2` path and the `/proc/<pid>/maps` fallback, which both
+/// supply the same `(path, start, end, file offset)` for an executable mapping.
+pub(super) fn add_module_mapping(
+    loaded_modules_by_path: &mut HashMap<PathBuf, LoadedModule>,
+    record_path: &Path,
+    record_path_string: &str,
+    start_addr: u64,
+    end_addr: u64,
+    page_offset: u64,
+    pid: pid_t,
+) {
+    let load_bias =
+        match ModuleSymbols::compute_load_bias(record_path, start_addr, end_addr, page_offset) {
+            Ok(load_bias) => load_bias,
+            Err(e) => {
+                debug!("Failed to compute load bias for {record_path_string}: {e}");
+                return;
+            }
+        };
 
     let loaded_module = loaded_modules_by_path
-        .entry(record_path.clone())
+        .entry(record_path.to_path_buf())
         .or_default();
 
-    let process_loaded_module = loaded_module
-        .process_loaded_modules
-        .entry(record.pid)
-        .or_default();
+    let process_loaded_module = loaded_module.process_loaded_modules.entry(pid).or_default();
 
     // Extract module symbols if it's no module symbol from path
     if loaded_module.module_symbols.is_none() {
-        match ModuleSymbols::from_elf(&record_path) {
+        match ModuleSymbols::from_elf(record_path) {
             Ok(symbols) => loaded_module.module_symbols = Some(symbols),
             Err(error) => {
                 debug!("Failed to load symbols for module {record_path_string}: {error}");
@@ -283,7 +301,7 @@ fn process_mmap2_record(
     // Extract unwind_data
     match unwind_data_from_elf(
         record_path_string.as_bytes(),
-        record.address,
+        start_addr,
         end_addr,
         None,
         load_bias,