From 7bbe86294563101df97ee241b2c86a1715455570 Mon Sep 17 00:00:00 2001
From: TheArtificialQ <TheArtificialQ@pm.me>
Date: Wed, 1 Jul 2026 10:27:54 +0200
Subject: [PATCH] feat: add support for setup script execution before scans

---
 README.md                                     |   3 +
 docs/usage/cli.mdx                            |  10 +
 strix/core/runner.py                          |   4 +
 strix/interface/assets/tui_styles.tcss        |   3 +
 strix/interface/cli.py                        |   1 +
 strix/interface/main.py                       |  38 ++++
 strix/interface/tui/app.py                    |  29 +++
 strix/interface/tui/live_view.py              |  46 ++++
 .../interface/tui/renderers/shell_renderer.py |  56 +++++
 strix/runtime/session_manager.py              | 121 ++++++++++
 tests/test_setup_script.py                    | 208 ++++++++++++++++++
 11 files changed, 519 insertions(+)
 create mode 100644 tests/test_setup_script.py
diff --git a/README.md b/README.md
index 5c93f1076..f9e6b3549 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,9 @@ strix --target api.your-app.com --instruction "Focus on business logic flaws and
 # Provide detailed instructions through file (e.g., rules of engagement, scope, exclusions)
 strix --target api.your-app.com --instruction-file ./instruction.md
 
+# Prepare the sandbox before scanning (install dependencies, seed data, connect VPN, etc.)
+strix --target ./app-directory --setup-script ./scripts/prepare-sandbox.sh
+
 # Force PR diff-scope against a specific base branch
 strix -n --target ./ --scan-mode quick --scope-mode diff --diff-base origin/main
 ```
diff --git a/docs/usage/cli.mdx b/docs/usage/cli.mdx
index 58d3e99df..96dd66436 100644
--- a/docs/usage/cli.mdx
+++ b/docs/usage/cli.mdx
@@ -29,6 +29,13 @@ strix --target <target> [options]
   </Note>
 </ParamField>
 
+<ParamField path="--setup-script" type="string">
+  Path to a bash script to execute inside the Docker container as the first step before the scan begins.
+  Use this to install dependencies, seed databases, establish VPN connections, or perform other environment preparation.
+
+  The script is bind-mounted read-only into the sandbox and run with `bash`.
+</ParamField>
+
 <ParamField path="--instruction" type="string">
   Custom instructions for the scan. Use for credentials, focus areas, or specific testing approaches.
 </ParamField>
@@ -98,6 +105,9 @@ strix -t https://github.com/org/app -t https://staging.example.com
 
 # Large local repository — bind-mount instead of copying it in
 strix --mount ./huge-monorepo
+
+# Prepare the sandbox before scanning
+strix --target ./app --setup-script ./scripts/prepare-sandbox.sh
 ```
 
 ## Exit Codes
diff --git a/strix/core/runner.py b/strix/core/runner.py
index 90becf870..e5b8bd236 100644
--- a/strix/core/runner.py
+++ b/strix/core/runner.py
@@ -49,6 +49,7 @@
 logger = logging.getLogger(__name__)
 
 StreamEventSink = Callable[[str, Any], None]
+SetupScriptEventSink = Callable[[dict[str, Any]], None]
 
 
 async def run_strix_scan(
@@ -64,6 +65,7 @@ async def run_strix_scan(
     model: str | None = None,
     cleanup_on_exit: bool = True,
     event_sink: StreamEventSink | None = None,
+    setup_script_event_sink: SetupScriptEventSink | None = None,
 ) -> RunResultBase | None:
     """Run or resume one Strix scan against a sandbox."""
     if scan_id is None:
@@ -144,6 +146,8 @@ async def run_strix_scan(
         scan_id,
         image=image,
         local_sources=local_sources or [],
+        setup_script=scan_config.get("setup_script"),
+        setup_script_event_sink=setup_script_event_sink,
     )
     logger.info("Sandbox ready for scan %s", scan_id)
 
diff --git a/strix/interface/assets/tui_styles.tcss b/strix/interface/assets/tui_styles.tcss
index d2984c8bc..6e309cbc2 100644
--- a/strix/interface/assets/tui_styles.tcss
+++ b/strix/interface/assets/tui_styles.tcss
@@ -386,6 +386,7 @@ VulnerabilityDetailScreen {
 
 .browser-tool,
 .terminal-tool,
+.setup-script-tool,
 .agents-graph-tool,
 .file-edit-tool,
 .proxy-tool,
@@ -410,6 +411,8 @@ VulnerabilityDetailScreen {
 .browser-tool.status-running,
 .terminal-tool.status-completed,
 .terminal-tool.status-running,
+.setup-script-tool.status-completed,
+.setup-script-tool.status-running,
 .agents-graph-tool.status-completed,
 .agents-graph-tool.status-running,
 .file-edit-tool.status-completed,
diff --git a/strix/interface/cli.py b/strix/interface/cli.py
index f50791203..d5402bece 100644
--- a/strix/interface/cli.py
+++ b/strix/interface/cli.py
@@ -94,6 +94,7 @@ async def run_cli(args: Any) -> None:  # noqa: PLR0915
         "scope_mode": getattr(args, "scope_mode", "auto"),
         "diff_base": getattr(args, "diff_base", None),
         "resume_instruction": getattr(args, "user_explicit_instruction", None) or "",
+        "setup_script": getattr(args, "setup_script", None),
     }
 
     report_state = ReportState(args.run_name)
diff --git a/strix/interface/main.py b/strix/interface/main.py
index 25df61882..344d54285 100644
--- a/strix/interface/main.py
+++ b/strix/interface/main.py
@@ -315,6 +315,25 @@ def _positive_budget(value: str) -> float:
     return budget
 
 
+def _resolve_setup_script_path(
+    value: str | None,
+    parser: argparse.ArgumentParser,
+) -> str | None:
+    if value is None:
+        return None
+
+    setup_script = Path(value).expanduser()
+    try:
+        resolved = setup_script.resolve(strict=True)
+    except OSError as exc:
+        parser.error(f"--setup-script path '{value}' is not readable: {exc}")
+
+    if not resolved.is_file():
+        parser.error(f"--setup-script requires a path to a file: {value}")
+
+    return str(resolved)
+
+
 def parse_arguments() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="Strix Multi-Agent Cybersecurity Penetration Testing Tool",
@@ -334,6 +353,9 @@ def parse_arguments() -> argparse.Namespace:
   # Large local repository (bind-mounted read-only instead of copied)
   strix --mount ./huge-monorepo
 
+  # Prepare the sandbox before scanning
+  strix --target ./my-project --setup-script ./scripts/prepare-sandbox.sh
+
   # Domain penetration test
   strix --target example.com
 
@@ -378,6 +400,17 @@ def parse_arguments() -> argparse.Namespace:
         "copying it file-by-file. Use this for large repositories that are too big to "
         "stream into the container. Can be specified multiple times.",
     )
+    parser.add_argument(
+        "--setup-script",
+        type=str,
+        metavar="PATH",
+        help=(
+            "Path to a bash script to execute inside the Docker container as the "
+            "first step before the scan begins. Useful for installing dependencies, "
+            "seeding databases, establishing VPN connections, or other environment "
+            "preparation."
+        ),
+    )
     parser.add_argument(
         "--instruction",
         type=str,
@@ -548,6 +581,8 @@ def parse_arguments() -> argparse.Namespace:
                 "--mount <path> to bind-mount the directory instead of copying it."
             )
 
+    args.setup_script = _resolve_setup_script_path(args.setup_script, parser)
+
     return args
 
 
@@ -568,6 +603,7 @@ def _persist_run_record(args: argparse.Namespace) -> None:
         "diff_scope": getattr(args, "diff_scope", {"active": False}),
         "scope_mode": args.scope_mode,
         "diff_base": args.diff_base,
+        "setup_script": args.setup_script,
     }
     write_run_record(run_dir, run_record)
 
@@ -612,6 +648,8 @@ def _load_resume_state(args: argparse.Namespace, parser: argparse.ArgumentParser
         args.local_sources = state.get("local_sources")
     if state.get("diff_scope"):
         args.diff_scope = state.get("diff_scope")
+    if args.setup_script is None and state.get("setup_script"):
+        args.setup_script = state.get("setup_script")
     persisted_scan_mode = state.get("scan_mode")
     if persisted_scan_mode and args.scan_mode == "deep":
         args.scan_mode = persisted_scan_mode
diff --git a/strix/interface/tui/app.py b/strix/interface/tui/app.py
index 4e43d9795..3a02908ab 100644
--- a/strix/interface/tui/app.py
+++ b/strix/interface/tui/app.py
@@ -745,6 +745,7 @@ def _build_scan_config(self, args: argparse.Namespace) -> dict[str, Any]:
             "scope_mode": getattr(args, "scope_mode", "auto"),
             "diff_base": getattr(args, "diff_base", None),
             "resume_instruction": getattr(args, "user_explicit_instruction", None) or "",
+            "setup_script": getattr(args, "setup_script", None),
         }
 
     def _setup_cleanup_handlers(self) -> None:
@@ -1127,6 +1128,19 @@ def keymap_styled(keys: list[tuple[str, str]]) -> Text:
             "completed": ("Agent completed", ""),
         }
 
+        if agent_data.get("kind") == "setup_script":
+            if status == "running":
+                text = self._get_animated_verb_text(agent_id, "Running setup script")
+                return (text, keymap_styled([("ctrl-q", "quit")]), True)
+            if status == "completed":
+                text = Text()
+                text.append("Setup script completed")
+                return (text, Text(), False)
+            if status == "failed":
+                text = Text()
+                text.append("Setup script failed", style="red")
+                return (text, Text(), False)
+
         if status in simple_statuses:
             msg, _ = simple_statuses[status]
             text = Text()
@@ -1372,6 +1386,7 @@ def scan_target() -> None:
                                 interactive=True,
                                 max_budget_usd=getattr(self.args, "max_budget_usd", None),
                                 event_sink=self._capture_sdk_event,
+                                setup_script_event_sink=self._capture_setup_script_event,
                             ),
                         )
 
@@ -1415,6 +1430,18 @@ def _capture_sdk_event(self, agent_id: str, event: Any) -> None:
     def _record_sdk_event(self, agent_id: str, event: Any) -> None:
         self.live_view.ingest_sdk_event(agent_id, event)
 
+    def _capture_setup_script_event(self, event: dict[str, Any]) -> None:
+        try:
+            self.call_from_thread(self._record_setup_script_event, event)
+        except RuntimeError:
+            self._record_setup_script_event(event)
+
+    def _record_setup_script_event(self, event: dict[str, Any]) -> None:
+        self.live_view.record_setup_script_event(event)
+        self._displayed_events.clear()
+        self._update_chat_view()
+        self._update_agent_status_display()
+
     def _add_agent_node(self, agent_data: dict[str, Any]) -> None:
         if len(self.screen_stack) > 1 or self.show_splash:
             return
@@ -1685,6 +1712,8 @@ def _validate_agent_for_stopping(self) -> tuple[str, bool]:
             if self.selected_agent_id in self.live_view.agents:
                 agent_data = self.live_view.agents[self.selected_agent_id]
                 agent_name = agent_data.get("name", "Unknown Agent")
+                if agent_data.get("kind") == "setup_script":
+                    return agent_name, False
 
                 agent_status = agent_data.get("status", "running")
                 if agent_status not in ["running", "waiting"]:
diff --git a/strix/interface/tui/live_view.py b/strix/interface/tui/live_view.py
index 993074d67..66fd9ebef 100644
--- a/strix/interface/tui/live_view.py
+++ b/strix/interface/tui/live_view.py
@@ -14,6 +14,10 @@
 from strix.interface.tui.history import load_session_history
 
 
+SETUP_SCRIPT_AGENT_ID = "setup-script"
+_SETUP_SCRIPT_CALL_ID = "setup-script"
+
+
 class TuiLiveView:
     def __init__(self) -> None:
         self.agents: dict[str, dict[str, Any]] = {}
@@ -97,6 +101,48 @@ def record_user_message(self, agent_id: str, content: str) -> None:
             },
         )
 
+    def record_setup_script_event(self, data: dict[str, Any]) -> None:
+        status = str(data.get("status") or "running")
+        agent_status = "failed" if status in {"failed", "error"} else status
+        self.upsert_agent(
+            SETUP_SCRIPT_AGENT_ID,
+            name="Setup Script",
+            parent_id=None,
+            status=agent_status,
+        )
+        self.agents[SETUP_SCRIPT_AGENT_ID]["kind"] = "setup_script"
+
+        self._record_tool_call_data(
+            SETUP_SCRIPT_AGENT_ID,
+            {
+                "call_id": _SETUP_SCRIPT_CALL_ID,
+                "tool_name": "setup_script",
+                "args": {
+                    "script": data.get("source_path"),
+                    "container_path": data.get("container_path"),
+                    "command": data.get("command"),
+                },
+            },
+        )
+
+        if status not in {"completed", "failed", "error"}:
+            return
+
+        self._record_tool_output_data(
+            SETUP_SCRIPT_AGENT_ID,
+            {
+                "call_id": _SETUP_SCRIPT_CALL_ID,
+                "tool_name": "setup_script",
+                "output": {
+                    "success": status == "completed",
+                    "stdout": data.get("stdout", ""),
+                    "stderr": data.get("stderr", ""),
+                    "exit_code": data.get("exit_code"),
+                    "duration_seconds": data.get("duration_seconds"),
+                },
+            },
+        )
+
     def ingest_sdk_event(self, agent_id: str, event: Any) -> None:
         event_type = getattr(event, "type", "")
         if event_type == "raw_response_event":
diff --git a/strix/interface/tui/renderers/shell_renderer.py b/strix/interface/tui/renderers/shell_renderer.py
index 131bd6cd0..8b4b3afe4 100644
--- a/strix/interface/tui/renderers/shell_renderer.py
+++ b/strix/interface/tui/renderers/shell_renderer.py
@@ -64,6 +64,26 @@ def _parse_sdk_shell_result(result: Any) -> dict[str, Any]:
     return parsed
 
 
+def _parse_setup_script_result(result: Any) -> dict[str, Any]:
+    if not isinstance(result, dict):
+        return _parse_sdk_shell_result(result)
+
+    parts: list[str] = []
+    stdout = str(result.get("stdout") or "").strip()
+    stderr = str(result.get("stderr") or "").strip()
+    if stdout:
+        parts.append(f"stdout:\n{stdout}")
+    if stderr:
+        parts.append(f"stderr:\n{stderr}")
+    if not parts:
+        parts.append("(no output)")
+
+    parsed: dict[str, Any] = {"content": "\n\n".join(parts)}
+    if result.get("exit_code") is not None:
+        parsed["exit_code"] = result.get("exit_code")
+    return parsed
+
+
 def _truncate_line(line: str) -> str:
     if len(line) > MAX_LINE_LENGTH:
         return line[: MAX_LINE_LENGTH - 3] + "..."
@@ -237,6 +257,42 @@ def render(cls, tool_data: dict[str, Any]) -> Static:
         return Static(content, classes=cls.get_css_classes(status))
 
 
+@register_tool_renderer
+class SetupScriptRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "setup_script"
+    css_classes: ClassVar[list[str]] = ["tool-call", "terminal-tool", "setup-script-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        args = tool_data.get("args", {})
+        status = tool_data.get("status", "unknown")
+        result = tool_data.get("result")
+
+        command = str(args.get("command") or "bash /tmp/strix-setup-script.sh")
+        script = args.get("script")
+        duration = result.get("duration_seconds") if isinstance(result, dict) else None
+
+        meta_parts: list[str] = []
+        if script:
+            meta_parts.append(f"script:{script}")
+        if isinstance(duration, int | float):
+            meta_parts.append(f"{duration:.1f}s")
+        meta = ", ".join(meta_parts) if meta_parts else None
+
+        parsed = _parse_setup_script_result(result) if result is not None else None
+
+        content = _build_terminal_content(
+            prompt="setup",
+            prompt_style="#22c55e",
+            command=command,
+            parsed_result=parsed,
+            tool_status=status,
+            meta=meta,
+        )
+
+        return Static(content, classes=cls.get_css_classes(status))
+
+
 @register_tool_renderer
 class WriteStdinRenderer(BaseToolRenderer):
     tool_name: ClassVar[str] = "write_stdin"
diff --git a/strix/runtime/session_manager.py b/strix/runtime/session_manager.py
index a19495cc5..1413ebaf3 100644
--- a/strix/runtime/session_manager.py
+++ b/strix/runtime/session_manager.py
@@ -2,7 +2,10 @@
 
 from __future__ import annotations
 
+import contextlib
 import logging
+import time
+from collections.abc import Callable
 from pathlib import Path
 from typing import Any
 
@@ -15,6 +18,7 @@
 
 
 logger = logging.getLogger(__name__)
+SetupScriptEventSink = Callable[[dict[str, Any]], None]
 
 
 # In-container Caido sidecar port (matches the image's caido-cli bind).
@@ -25,6 +29,8 @@
 
 # Manifest root inside the container; entry keys hang off this path.
 _WORKSPACE_ROOT = "/workspace"
+_SETUP_SCRIPT_CONTAINER_PATH = "/tmp/strix-setup-script.sh"
+_SETUP_SCRIPT_TIMEOUT_SECONDS = 3600
 
 
 def build_session_entries(
@@ -58,11 +64,116 @@ def build_session_entries(
     return entries, bind_mounts
 
 
+def build_setup_script_mount(setup_script: str | None) -> dict[str, Any] | None:
+    """Return the Docker bind-mount spec for a host setup script, if configured."""
+    if not setup_script:
+        return None
+
+    resolved = Path(setup_script).expanduser().resolve()
+    if not resolved.is_file():
+        raise FileNotFoundError(
+            f"Setup script does not exist or is not a file: {setup_script}"
+        )
+
+    return {
+        "source": str(resolved),
+        "target": _SETUP_SCRIPT_CONTAINER_PATH,
+        "read_only": True,
+    }
+
+
+def _exec_text(value: Any) -> str:
+    if isinstance(value, bytes):
+        return value.decode("utf-8", errors="replace")
+    return str(value or "")
+
+
+def _emit_setup_script_event(
+    event_sink: SetupScriptEventSink | None,
+    data: dict[str, Any],
+) -> None:
+    if event_sink is None:
+        return
+    try:
+        event_sink(data)
+    except Exception:
+        logger.exception("Setup script event sink failed")
+
+
+async def execute_setup_script(
+    session: Any,
+    *,
+    source_path: str,
+    event_sink: SetupScriptEventSink | None = None,
+) -> None:
+    """Run the configured setup script inside the already-started sandbox."""
+    logger.info("Running setup script inside sandbox: %s", _SETUP_SCRIPT_CONTAINER_PATH)
+    command = f"bash {_SETUP_SCRIPT_CONTAINER_PATH}"
+    started = time.perf_counter()
+    _emit_setup_script_event(
+        event_sink,
+        {
+            "status": "running",
+            "source_path": source_path,
+            "container_path": _SETUP_SCRIPT_CONTAINER_PATH,
+            "command": command,
+        },
+    )
+    result = await session.exec(
+        "bash",
+        _SETUP_SCRIPT_CONTAINER_PATH,
+        timeout=_SETUP_SCRIPT_TIMEOUT_SECONDS,
+    )
+    elapsed = time.perf_counter() - started
+    stdout = _exec_text(getattr(result, "stdout", "")).strip()
+    stderr = _exec_text(getattr(result, "stderr", "")).strip()
+    exit_code = getattr(result, "exit_code", "unknown")
+    if result.ok():
+        _emit_setup_script_event(
+            event_sink,
+            {
+                "status": "completed",
+                "source_path": source_path,
+                "container_path": _SETUP_SCRIPT_CONTAINER_PATH,
+                "command": command,
+                "stdout": stdout,
+                "stderr": stderr,
+                "exit_code": exit_code,
+                "duration_seconds": elapsed,
+            },
+        )
+        if stdout:
+            logger.info("Setup script completed successfully: %s", stdout[-1000:])
+        else:
+            logger.info("Setup script completed successfully")
+        return
+
+    _emit_setup_script_event(
+        event_sink,
+        {
+            "status": "failed",
+            "source_path": source_path,
+            "container_path": _SETUP_SCRIPT_CONTAINER_PATH,
+            "command": command,
+            "stdout": stdout,
+            "stderr": stderr,
+            "exit_code": exit_code,
+            "duration_seconds": elapsed,
+        },
+    )
+    raise RuntimeError(
+        "Setup script failed inside sandbox "
+        f"(exit {exit_code}). stdout: {stdout[-2000:]!r} stderr: {stderr[-2000:]!r}"
+    )
+
+
 async def create_or_reuse(
     scan_id: str,
     *,
     image: str,
     local_sources: list[dict[str, Any]],
+    setup_script: str | None = None,
+    setup_script_event_sink: SetupScriptEventSink | None = None,
 ) -> dict[str, Any]:
     """Return the existing session bundle for ``scan_id`` or create a new one.
 
@@ -76,6 +187,9 @@ async def create_or_reuse(
         return cached
 
     entries, bind_mounts = build_session_entries(local_sources)
+    setup_script_mount = build_setup_script_mount(setup_script)
+    if setup_script_mount is not None:
+        bind_mounts.append(setup_script_mount)
 
     # Caido runs as an in-container sidecar; HTTP(S) traffic from any
     # process started via ``session.exec`` (the SDK's Shell tool, etc.)
@@ -123,6 +237,13 @@ async def create_or_reuse(
         container_url=container_caido_url,
     )
 
+    if setup_script_mount is not None:
+        await execute_setup_script(
+            session,
+            source_path=str(setup_script_mount["source"]),
+            event_sink=setup_script_event_sink,
+        )
+
     bundle = {
         "client": client,
         "session": session,
diff --git a/tests/test_setup_script.py b/tests/test_setup_script.py
new file mode 100644
index 000000000..bd07c3fc1
--- /dev/null
+++ b/tests/test_setup_script.py
@@ -0,0 +1,208 @@
+"""Tests for sandbox setup-script mounting and execution."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import TYPE_CHECKING, Any
+
+import pytest
+
+from strix.interface.tui.live_view import SETUP_SCRIPT_AGENT_ID, TuiLiveView
+from strix.runtime import session_manager
+from strix.runtime.session_manager import (
+    build_setup_script_mount,
+    execute_setup_script,
+)
+
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+class FakeExecResult:
+    def __init__(
+        self,
+        *,
+        exit_code: int = 0,
+        stdout: str | bytes = "",
+        stderr: str | bytes = "",
+    ) -> None:
+        self.exit_code = exit_code
+        self.stdout = stdout
+        self.stderr = stderr
+
+    def ok(self) -> bool:
+        return self.exit_code == 0
+
+
+class FakeSession:
+    def __init__(self, result: FakeExecResult | None = None) -> None:
+        self.result = result or FakeExecResult()
+        self.exec_calls: list[tuple[tuple[Any, ...], dict[str, Any]]] = []
+
+    async def exec(self, *args: Any, **kwargs: Any) -> FakeExecResult:
+        self.exec_calls.append((args, kwargs))
+        return self.result
+
+    async def resolve_exposed_port(self, _port: int) -> SimpleNamespace:
+        return SimpleNamespace(host="127.0.0.1", port=12345)
+
+
+def test_build_setup_script_mount_resolves_file(tmp_path: Path) -> None:
+    script = tmp_path / "setup.sh"
+    script.write_text("#!/usr/bin/env bash\n", encoding="utf-8")
+
+    assert build_setup_script_mount(str(script)) == {
+        "source": str(script.resolve()),
+        "target": "/tmp/strix-setup-script.sh",
+        "read_only": True,
+    }
+
+
+def test_build_setup_script_mount_rejects_missing_file(tmp_path: Path) -> None:
+    with pytest.raises(FileNotFoundError, match="Setup script does not exist"):
+        build_setup_script_mount(str(tmp_path / "missing.sh"))
+
+
+def test_live_view_records_setup_script_output() -> None:
+    live_view = TuiLiveView()
+
+    live_view.record_setup_script_event(
+        {
+            "status": "running",
+            "source_path": "/host/setup.sh",
+            "container_path": "/tmp/strix-setup-script.sh",
+            "command": "bash /tmp/strix-setup-script.sh",
+        }
+    )
+    live_view.record_setup_script_event(
+        {
+            "status": "completed",
+            "source_path": "/host/setup.sh",
+            "container_path": "/tmp/strix-setup-script.sh",
+            "command": "bash /tmp/strix-setup-script.sh",
+            "stdout": "seeded database",
+            "stderr": "",
+            "exit_code": 0,
+            "duration_seconds": 1.25,
+        }
+    )
+
+    assert live_view.agents[SETUP_SCRIPT_AGENT_ID]["status"] == "completed"
+    assert live_view.agents[SETUP_SCRIPT_AGENT_ID]["kind"] == "setup_script"
+    events = live_view.events_for_agent(SETUP_SCRIPT_AGENT_ID)
+    assert len(events) == 1
+    assert events[0]["data"]["tool_name"] == "setup_script"
+    assert events[0]["data"]["status"] == "completed"
+    assert events[0]["data"]["result"]["stdout"] == "seeded database"
+
+
+@pytest.mark.asyncio
+async def test_execute_setup_script_runs_bash() -> None:
+    session = FakeSession(FakeExecResult(stdout="ready\n"))
+    events: list[dict[str, Any]] = []
+
+    await execute_setup_script(
+        session,
+        source_path="/host/setup.sh",
+        event_sink=events.append,
+    )
+
+    assert session.exec_calls == [
+        (("bash", "/tmp/strix-setup-script.sh"), {"timeout": 3600}),
+    ]
+    assert [event["status"] for event in events] == ["running", "completed"]
+    assert events[0]["source_path"] == "/host/setup.sh"
+    assert events[0]["command"] == "bash /tmp/strix-setup-script.sh"
+    assert events[1]["stdout"] == "ready"
+    assert events[1]["stderr"] == ""
+    assert events[1]["exit_code"] == 0
+
+
+@pytest.mark.asyncio
+async def test_execute_setup_script_raises_on_failure() -> None:
+    events: list[dict[str, Any]] = []
+    session = FakeSession(FakeExecResult(exit_code=12, stderr=b"boom"))
+
+    with pytest.raises(RuntimeError, match="Setup script failed inside sandbox"):
+        await execute_setup_script(
+            session,
+            source_path="/host/setup.sh",
+            event_sink=events.append,
+        )
+
+    assert [event["status"] for event in events] == ["running", "failed"]
+    assert events[1]["stderr"] == "boom"
+    assert events[1]["exit_code"] == 12
+
+
+@pytest.mark.asyncio
+async def test_create_or_reuse_runs_setup_after_runtime_bootstrap(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    script = tmp_path / "setup.sh"
+    script.write_text("#!/usr/bin/env bash\n", encoding="utf-8")
+    session = FakeSession()
+    events: list[str] = []
+    captured_mounts: list[dict[str, Any]] = []
+
+    async def fake_backend(
+        *,
+        image: str,
+        manifest: Any,
+        exposed_ports: tuple[int, ...],
+        bind_mounts: list[dict[str, Any]] | None = None,
+    ) -> tuple[object, FakeSession]:
+        del image, manifest, exposed_ports
+        events.append("backend")
+        captured_mounts.extend(bind_mounts or [])
+        return object(), session
+
+    async def fake_bootstrap_caido(
+        sandbox_session: FakeSession,
+        *,
+        host_url: str,
+        container_url: str,
+    ) -> object:
+        del sandbox_session, host_url, container_url
+        events.append("caido")
+        return object()
+
+    original_execute_setup_script = session_manager.execute_setup_script
+
+    async def record_execute_setup_script(
+        sandbox_session: FakeSession,
+        *,
+        source_path: str,
+        event_sink: Any,
+    ) -> None:
+        del event_sink
+        events.append("setup")
+        await original_execute_setup_script(sandbox_session, source_path=source_path)
+
+    monkeypatch.setattr(session_manager, "_SESSION_CACHE", {})
+    monkeypatch.setattr(
+        session_manager,
+        "load_settings",
+        lambda: SimpleNamespace(runtime=SimpleNamespace(backend="docker")),
+    )
+    monkeypatch.setattr(session_manager, "get_backend", lambda _name: fake_backend)
+    monkeypatch.setattr(session_manager, "bootstrap_caido", fake_bootstrap_caido)
+    monkeypatch.setattr(session_manager, "execute_setup_script", record_execute_setup_script)
+
+    await session_manager.create_or_reuse(
+        "scan-with-setup",
+        image="strix-test:latest",
+        local_sources=[],
+        setup_script=str(script),
+    )
+
+    assert events == ["backend", "caido", "setup"]
+    assert captured_mounts == [
+        {
+            "source": str(script.resolve()),
+            "target": "/tmp/strix-setup-script.sh",
+            "read_only": True,
+        }
+    ]