working singlefile

2026-02-05 10:57:32 +08:00 · 2026-01-19 03:05:49 -08:00 · 2026-01-19 03:05:49 -08:00 · bef67760db
commit bef67760db
parent b5bbc3b549
17 changed files with 498 additions and 54 deletions
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@ -1020,14 +1020,14 @@ class Process(models.Model):

        # Debug logging
        import sys
-        print(f"DEBUG _find_parent_process: my_pid={os.getpid()}, ppid={ppid}", file=sys.stderr)
+        # print(f"DEBUG _find_parent_process: my_pid={os.getpid()}, ppid={ppid}", file=sys.stderr)

        # Get parent process start time from OS
        try:
            os_parent = psutil.Process(ppid)
            os_parent_start = os_parent.create_time()
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
-            print(f"DEBUG _find_parent_process: Parent process {ppid} not accessible", file=sys.stderr)
+            # print(f"DEBUG _find_parent_process: Parent process {ppid} not accessible", file=sys.stderr)
            return None  # Parent process doesn't exist

        # Find matching Process record
@ -1038,18 +1038,18 @@ class Process(models.Model):
            started_at__gte=timezone.now() - PID_REUSE_WINDOW,
        ).order_by('-started_at')

-        print(f"DEBUG _find_parent_process: Found {candidates.count()} candidates for ppid={ppid}", file=sys.stderr)
+        # print(f"DEBUG _find_parent_process: Found {candidates.count()} candidates for ppid={ppid}", file=sys.stderr)

        for candidate in candidates:
            if candidate.started_at:
                db_start_time = candidate.started_at.timestamp()
                time_diff = abs(db_start_time - os_parent_start)
-                print(f"DEBUG _find_parent_process: Checking candidate id={candidate.id} time_diff={time_diff:.2f}s tolerance={START_TIME_TOLERANCE}s", file=sys.stderr)
+                # print(f"DEBUG _find_parent_process: Checking candidate id={candidate.id} time_diff={time_diff:.2f}s tolerance={START_TIME_TOLERANCE}s", file=sys.stderr)
                if time_diff < START_TIME_TOLERANCE:
-                    print(f"DEBUG _find_parent_process: MATCH! Returning parent id={candidate.id} pid={candidate.pid}", file=sys.stderr)
+                    # print(f"DEBUG _find_parent_process: MATCH! Returning parent id={candidate.id} pid={candidate.pid}", file=sys.stderr)
                    return candidate

-        print(f"DEBUG _find_parent_process: No matching parent found for ppid={ppid}", file=sys.stderr)
+        # print(f"DEBUG _find_parent_process: No matching parent found for ppid={ppid}", file=sys.stderr)
        return None  # No matching ArchiveBox parent process

    @classmethod
@ -1519,7 +1519,7 @@ class Process(models.Model):
        stdout_path = self.stdout_file
        stderr_path = self.stderr_file

-        with open(stdout_path, 'w') as out, open(stderr_path, 'w') as err:
+        with open(stdout_path, 'a') as out, open(stderr_path, 'a') as err:
            proc = subprocess.Popen(
                self.cmd,
                cwd=working_dir,
--- a/archivebox/misc/progress_layout.py
+++ b/archivebox/misc/progress_layout.py
@ -10,6 +10,7 @@ Shows a comprehensive dashboard with:
 __package__ = 'archivebox.misc'

 from datetime import datetime, timezone
+import os
 import re
 from typing import List, Optional, Any
 from collections import deque
@ -23,6 +24,7 @@ from rich.panel import Panel
 from rich.text import Text
 from rich.table import Table
 from rich.tree import Tree
+from rich.cells import cell_len

 from archivebox.config import VERSION

@ -533,7 +535,23 @@ class CrawlQueueTreePanel:
                        is_pending = hook.get('is_pending', False)
                        icon, color = self._hook_style(status, is_bg=is_bg, is_running=is_running, is_pending=is_pending)
                        stats = self._hook_stats(size=size, elapsed=elapsed, timeout=timeout, status=status)
-                        snap_node.add(Text(f"{icon} {path}{stats}", style=color))
+                        line = Text(f"{icon} {path}{stats}", style=color)
+                        stderr_tail = hook.get('stderr', '')
+                        if stderr_tail:
+                            left_str = f"{icon} {path}{stats}"
+                            avail = self._available_width(left_str, indent=16)
+                            trunc = getattr(self, "_truncate_tail", self._truncate_to_width)
+                            stderr_tail = trunc(stderr_tail, avail)
+                            if not stderr_tail:
+                                snap_node.add(line)
+                                continue
+                            row = Table.grid(expand=True)
+                            row.add_column(justify="left", ratio=1)
+                            row.add_column(justify="right")
+                            row.add_row(line, Text(stderr_tail, style="grey70"))
+                            snap_node.add(row)
+                        else:
+                            snap_node.add(line)
                trees.append(crawl_tree)
            content = Group(*trees)

@ -561,7 +579,7 @@ class CrawlQueueTreePanel:
        if status == 'succeeded':
            return '✅', 'green'
        if status == 'failed':
-            return '⚠️', 'yellow'
+            return '✖', 'red'
        if status == 'skipped':
            return '⏭', 'grey53'
        if is_pending:
@ -595,6 +613,37 @@ class CrawlQueueTreePanel:
            return f" ({size_part} | {time_part})" if time_part else f" ({size_part})"
        return ''

+    @staticmethod
+    def _terminal_width() -> int:
+        try:
+            return os.get_terminal_size().columns
+        except OSError:
+            return 120
+
+    @staticmethod
+    def _truncate_to_width(text: str, max_width: int) -> str:
+        if not text or max_width <= 0:
+            return ''
+        t = Text(text)
+        t.truncate(max_width, overflow="ellipsis")
+        return t.plain
+
+    @staticmethod
+    def _truncate_tail(text: str, max_width: int) -> str:
+        if not text or max_width <= 0:
+            return ''
+        if cell_len(text) <= max_width:
+            return text
+        if max_width <= 1:
+            return '…'
+        return f"…{text[-(max_width - 1):]}"
+
+    def _available_width(self, left_text: str, indent: int = 0) -> int:
+        width = self._terminal_width()
+        base = max(0, width - cell_len(left_text) - indent - 6)
+        cap = max(0, (width * 2) // 5)
+        return max(0, min(base, cap))
+

 class ArchiveBoxProgressLayout:
    """
@ -631,7 +680,7 @@ class ArchiveBoxProgressLayout:
        # Top-level split: crawl_queue, crawl_tree, processes
        layout.split(
            Layout(name="crawl_queue", size=3),
-            Layout(name="crawl_tree", size=14),
+            Layout(name="crawl_tree", size=20),
            Layout(name="processes", ratio=1),
        )

@ -671,6 +720,8 @@ class ArchiveBoxProgressLayout:
                cmd = getattr(process, 'cmd', [])
                hook_path = Path(cmd[1]) if len(cmd) > 1 else None
                hook_name = hook_path.name if hook_path else ''
+                if '.bg.' in hook_name:
+                    continue
                if '.bg.' not in hook_name:
                    fg_running = True
                    break
@ -684,6 +735,8 @@ class ArchiveBoxProgressLayout:
                cmd = getattr(process, 'cmd', [])
                hook_path = Path(cmd[1]) if len(cmd) > 1 else None
                hook_name = hook_path.name if hook_path else ''
+                if '.bg.' in hook_name:
+                    continue
                if '.bg.' not in hook_name:
                    fg_pending = True
                    break
@ -701,6 +754,10 @@ class ArchiveBoxProgressLayout:
                    is_bg = '.bg.' in hook_name
                except Exception:
                    is_bg = False
+            if is_hook and is_bg:
+                continue
+            if not self._has_log_lines(process):
+                continue
            is_pending = getattr(process, 'status', '') in ('queued', 'pending', 'backoff') or (is_hook and not getattr(process, 'pid', None))
            max_lines = 2 if is_pending else (4 if is_bg else 7)
            panels.append(ProcessLogPanel(process, max_lines=max_lines, compact=is_bg, bg_terminating=bg_terminating))
@ -718,6 +775,17 @@ class ArchiveBoxProgressLayout:
    def update_crawl_tree(self, crawls: list[dict[str, Any]]) -> None:
        """Update the crawl queue tree panel."""
        self.crawl_queue_tree.update_crawls(crawls)
+        # Auto-size crawl tree panel to content
+        line_count = 0
+        for crawl in crawls:
+            line_count += 1
+            for snap in crawl.get('snapshots', []) or []:
+                line_count += 1
+                if snap.get('output_path'):
+                    line_count += 1
+                for _ in snap.get('hooks', []) or []:
+                    line_count += 1
+        self.layout["crawl_tree"].size = max(4, line_count + 2)

    def log_event(self, message: str, style: str = "white") -> None:
        """Add an event to the orchestrator log."""
@ -767,8 +835,28 @@ class ArchiveBoxProgressLayout:
                        timeout=hook.get('timeout', ''),
                        status=status,
                    )
+                    stderr_tail = hook.get('stderr', '')
                    hook_line = f"    {icon} {path}{stats}".strip()
+                    if stderr_tail:
+                        avail = self.crawl_queue_tree._available_width(hook_line, indent=16)
+                        trunc = getattr(self.crawl_queue_tree, "_truncate_tail", self.crawl_queue_tree._truncate_to_width)
+                        stderr_tail = trunc(stderr_tail, avail)
+                        if stderr_tail:
+                            hook_line = f"{hook_line}  {stderr_tail}"
                    if hook_line:
                        lines.append(("crawl_tree", hook_line))

        return lines
+
+    @staticmethod
+    def _has_log_lines(process: Any) -> bool:
+        try:
+            stdout_lines = list(process.tail_stdout(lines=1, follow=False))
+            if any(line.strip() for line in stdout_lines):
+                return True
+            stderr_lines = list(process.tail_stderr(lines=1, follow=False))
+            if any(line.strip() for line in stderr_lines):
+                return True
+        except Exception:
+            return False
+        return False
--- a/archivebox/plugins/archivedotorg/on_Snapshot__13_archivedotorg.bg.py
+++ b/archivebox/plugins/archivedotorg/on_Snapshot__13_archivedotorg.bg.py
@ -2,7 +2,7 @@
 """
 Submit a URL to archive.org for archiving.

-Usage: on_Snapshot__archivedotorg.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__archivedotorg.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Writes archive.org.txt to $PWD with the archived URL

 Environment variables:
--- a/archivebox/plugins/chrome/chrome_utils.js
+++ b/archivebox/plugins/chrome/chrome_utils.js
@ -803,9 +803,16 @@ try {
 * @returns {string} - 32-character extension ID
 */
 function getExtensionId(unpacked_path) {
+    let resolved_path = unpacked_path;
+    try {
+        resolved_path = fs.realpathSync(unpacked_path);
+    } catch (err) {
+        // Use the provided path if realpath fails
+        resolved_path = unpacked_path;
+    }
    // Chrome uses a SHA256 hash of the unpacked extension directory path
    const hash = crypto.createHash('sha256');
-    hash.update(Buffer.from(unpacked_path, 'utf-8'));
+    hash.update(Buffer.from(resolved_path, 'utf-8'));

    // Convert first 32 hex chars to characters in the range 'a'-'p'
    const detected_extension_id = Array.from(hash.digest('hex'))
@ -978,6 +985,8 @@ async function isTargetExtension(target) {

    let extension_id = null;
    let manifest_version = null;
+    let manifest = null;
+    let manifest_name = null;
    const target_is_extension = is_chrome_extension || target_is_bg;

    if (target_is_extension) {
@ -985,8 +994,9 @@ async function isTargetExtension(target) {
            extension_id = target_url?.split('://')[1]?.split('/')[0] || null;

            if (target_ctx) {
-                const manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
+                manifest = await target_ctx.evaluate(() => chrome.runtime.getManifest());
                manifest_version = manifest?.manifest_version || null;
+                manifest_name = manifest?.name || null;
            }
        } catch (err) {
            // Failed to get extension metadata
@ -1001,6 +1011,8 @@ async function isTargetExtension(target) {
        target_url,
        extension_id,
        manifest_version,
+        manifest,
+        manifest_name,
    };
 }

@ -1053,14 +1065,23 @@ async function loadExtensionFromTarget(extensions, target) {

        // Trigger extension toolbar button click
        dispatchAction: async (tab) => {
-            return await target_ctx.evaluate((tabId) => {
-                return new Promise((resolve) => {
-                    chrome.action.onClicked.addListener((tab) => {
-                        resolve({ success: true, tab });
-                    });
-                    chrome.action.openPopup();
-                });
-            }, tab?.id || null);
+            return await target_ctx.evaluate(async (tab) => {
+                tab = tab || (await new Promise((resolve) =>
+                    chrome.tabs.query({ currentWindow: true, active: true }, ([tab]) => resolve(tab))
+                ));
+
+                // Manifest V3: chrome.action
+                if (chrome.action?.onClicked?.dispatch) {
+                    return await chrome.action.onClicked.dispatch(tab);
+                }
+
+                // Manifest V2: chrome.browserAction
+                if (chrome.browserAction?.onClicked?.dispatch) {
+                    return await chrome.browserAction.onClicked.dispatch(tab);
+                }
+
+                throw new Error('Extension action dispatch not available');
+            }, tab || null);
        },

        // Send message to extension
--- a/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
+++ b/archivebox/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js
@ -118,9 +118,7 @@ process.on('SIGTERM', () => cleanup('SIGTERM'));
 process.on('SIGINT', () => cleanup('SIGINT'));

 // Try to find the crawl's Chrome session
-function findCrawlChromeSession(crawlId) {
-    if (!crawlId) return null;
-
+function findCrawlChromeSession() {
    // Use CRAWL_OUTPUT_DIR env var set by get_config() in configset.py
    const crawlOutputDir = getEnv('CRAWL_OUTPUT_DIR', '');
    if (!crawlOutputDir) return null;
@ -301,7 +299,7 @@ async function main() {
    const args = parseArgs();
    const url = args.url;
    const snapshotId = args.snapshot_id;
-    const crawlId = args.crawl_id;
+    const crawlId = args.crawl_id || getEnv('CRAWL_ID', '');

    if (!url || !snapshotId) {
        console.error('Usage: on_Snapshot__10_chrome_tab.bg.js --url=<url> --snapshot-id=<uuid> [--crawl-id=<uuid>]');
@ -332,15 +330,14 @@ async function main() {
        }

        // Try to use existing crawl Chrome session
-        const crawlSession = findCrawlChromeSession(crawlId);
+        const crawlSession = findCrawlChromeSession();
        let result;

        if (crawlSession) {
            console.log(`[*] Found existing Chrome session from crawl ${crawlId}`);
            result = await createTabInExistingChrome(crawlSession.cdpUrl, url, crawlSession.pid);
        } else {
-            console.log(`[*] No crawl Chrome session found, launching new Chrome`);
-            result = await launchNewChrome(url, binary);
+            result = { success: false, error: 'No crawl Chrome session found (CRAWL_OUTPUT_DIR missing or chrome not running)' };
        }

        if (result.success) {
--- a/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py
+++ b/archivebox/plugins/favicon/on_Snapshot__11_favicon.bg.py
@ -2,7 +2,7 @@
 """
 Extract favicon from a URL.

-Usage: on_Snapshot__favicon.py --url=<url> --snapshot-id=<uuid>
+Usage: on_Snapshot__favicon.bg.py --url=<url> --snapshot-id=<uuid>
 Output: Writes favicon.ico to $PWD

 Environment variables:
--- a/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
+++ b/archivebox/plugins/forumdl/on_Snapshot__04_forumdl.bg.py
@ -17,6 +17,7 @@ Environment variables:

 import json
 import os
+import shutil
 import subprocess
 import sys
 import threading
@ -87,6 +88,27 @@ def get_env_array(name: str, default: list[str] | None = None) -> list[str]:
        return default if default is not None else []


+def get_binary_shebang(binary_path: str) -> str | None:
+    """Return interpreter from shebang line if present (e.g., /path/to/python)."""
+    try:
+        with open(binary_path, 'r', encoding='utf-8') as f:
+            first_line = f.readline().strip()
+            if first_line.startswith('#!'):
+                return first_line[2:].strip().split(' ')[0]
+    except Exception:
+        pass
+    return None
+
+
+def resolve_binary_path(binary: str) -> str | None:
+    """Resolve binary to an absolute path if possible."""
+    if not binary:
+        return None
+    if Path(binary).is_file():
+        return binary
+    return shutil.which(binary)
+
+

 def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
    """
@ -118,10 +140,12 @@ def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:

    # Use our Pydantic v2 compatible wrapper if available, otherwise fall back to binary
    wrapper_path = Path(__file__).parent / 'forum-dl-wrapper.py'
+    resolved_binary = resolve_binary_path(binary) or binary
    if wrapper_path.exists():
-        cmd = [sys.executable, str(wrapper_path), *forumdl_args, '-f', output_format, '-o', str(output_file)]
+        forumdl_python = get_binary_shebang(resolved_binary) or sys.executable
+        cmd = [forumdl_python, str(wrapper_path), *forumdl_args, '-f', output_format, '-o', str(output_file)]
    else:
-        cmd = [binary, *forumdl_args, '-f', output_format, '-o', str(output_file)]
+        cmd = [resolved_binary, *forumdl_args, '-f', output_format, '-o', str(output_file)]

    if not check_ssl:
        cmd.append('--no-check-certificate')
@ -187,7 +211,7 @@ def save_forum(url: str, binary: str) -> tuple[bool, str | None, str]:
            if 'unable to extract' in stderr_lower:
                return False, None, 'Unable to extract forum info'

-            return False, None, f'forum-dl error: {stderr[:200]}'
+            return False, None, f'forum-dl error: {stderr}'

    except subprocess.TimeoutExpired:
        return False, None, f'Timed out after {timeout} seconds'
--- a/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
+++ b/archivebox/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py
@ -196,7 +196,7 @@ def save_gallery(url: str, binary: str) -> tuple[bool, str | None, str]:
            if 'unable to extract' in stderr_lower:
                return False, None, 'Unable to extract gallery info'

-            return False, None, f'gallery-dl error: {stderr[:200]}'
+            return False, None, f'gallery-dl error: {stderr}'

    except subprocess.TimeoutExpired:
        return False, None, f'Timed out after {timeout} seconds'
--- a/archivebox/plugins/mercury/on_Snapshot__57_mercury.py
+++ b/archivebox/plugins/mercury/on_Snapshot__57_mercury.py
@ -82,6 +82,9 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
        # Get text version
        cmd_text = [binary, *mercury_args, *mercury_args_extra, url, '--format=text']
        result_text = subprocess.run(cmd_text, stdout=subprocess.PIPE, timeout=timeout, text=True)
+        if result_text.stdout:
+            sys.stderr.write(result_text.stdout)
+            sys.stderr.flush()

        if result_text.returncode != 0:
            return False, None, f'postlight-parser failed (exit={result_text.returncode})'
@ -101,6 +104,9 @@ def extract_mercury(url: str, binary: str) -> tuple[bool, str | None, str]:
        # Get HTML version
        cmd_html = [binary, *mercury_args, *mercury_args_extra, url, '--format=html']
        result_html = subprocess.run(cmd_html, stdout=subprocess.PIPE, timeout=timeout, text=True)
+        if result_html.stdout:
+            sys.stderr.write(result_html.stdout)
+            sys.stderr.flush()

        try:
            html_json = json.loads(result_html.stdout)
--- a/archivebox/plugins/readability/on_Snapshot__56_readability.py
+++ b/archivebox/plugins/readability/on_Snapshot__56_readability.py
@ -109,6 +109,10 @@ def extract_readability(url: str, binary: str) -> tuple[bool, str | None, str]:
        cmd = [binary, *readability_args, *readability_args_extra, html_source]
        result = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=timeout, text=True)

+        if result.stdout:
+            sys.stderr.write(result.stdout)
+            sys.stderr.flush()
+
        if result.returncode != 0:
            return False, None, f'readability-extractor failed (exit={result.returncode})'

--- a/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
+++ b/archivebox/plugins/singlefile/on_Crawl__82_singlefile_install.js
@ -116,7 +116,19 @@ async function saveSinglefileWithExtension(page, extension, options = {}) {

    // Trigger the extension's action (toolbar button click)
    console.error('[singlefile] Dispatching extension action...');
-    await extension.dispatchAction();
+    try {
+        const actionTimeoutMs = options.actionTimeoutMs || 5000;
+        const actionPromise = extension.dispatchAction();
+        const actionResult = await Promise.race([
+            actionPromise,
+            wait(actionTimeoutMs).then(() => 'timeout'),
+        ]);
+        if (actionResult === 'timeout') {
+            console.error(`[singlefile] Extension action did not resolve within ${actionTimeoutMs}ms, continuing...`);
+        }
+    } catch (err) {
+        console.error(`[singlefile] Extension action error: ${err.message || err}`);
+    }

    // Wait for file to appear in downloads directory
    const check_delay = 3000; // 3 seconds
--- a/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
+++ b/archivebox/plugins/singlefile/on_Snapshot__50_singlefile.py
@ -27,6 +27,7 @@ import threading
 import time
 from urllib.request import urlopen
 from pathlib import Path
+import shutil

 import rich_click as click

@ -142,6 +143,7 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:

    Returns: (success, output_path, error_message)
    """
+    print(f'[singlefile] CLI mode start url={url}', file=sys.stderr)
    # Get config from env (with SINGLEFILE_ prefix, x-fallback handled by config loader)
    timeout = get_env_int('SINGLEFILE_TIMEOUT') or get_env_int('TIMEOUT', 120)
    user_agent = get_env('SINGLEFILE_USER_AGENT') or get_env('USER_AGENT', '')
@ -172,8 +174,10 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
        cdp_remote_url = None

    if cdp_remote_url:
+        print(f'[singlefile] Using existing Chrome session: {cdp_remote_url}', file=sys.stderr)
        cmd.extend(['--browser-server', cdp_remote_url])
    elif chrome:
+        print(f'[singlefile] Launching Chrome binary: {chrome}', file=sys.stderr)
        cmd.extend(['--browser-executable-path', chrome])

    # Pass Chrome arguments (only when launching a new browser)
@ -200,6 +204,7 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:
    output_path = output_dir / OUTPUT_FILE

    cmd.extend([url, str(output_path)])
+    print(f'[singlefile] CLI command: {" ".join(cmd[:6])} ...', file=sys.stderr)

    try:
        output_lines: list[str] = []
@ -258,36 +263,93 @@ def save_singlefile(url: str, binary: str) -> tuple[bool, str | None, str]:

 def save_singlefile_with_extension(url: str, timeout: int) -> tuple[bool, str | None, str]:
    """Save using the SingleFile Chrome extension via existing Chrome session."""
+    print(f'[singlefile] Extension mode start url={url}', file=sys.stderr)
    # Only attempt if chrome session exists
    cdp_url = get_cdp_url(wait_seconds=min(5, max(1, timeout // 10)))
    if not cdp_url:
+        print('[singlefile] No chrome session (cdp_url.txt missing)', file=sys.stderr)
        return False, None, 'No Chrome session available'

    if not EXTENSION_SAVE_SCRIPT.exists():
+        print(f'[singlefile] Missing helper script: {EXTENSION_SAVE_SCRIPT}', file=sys.stderr)
        return False, None, 'SingleFile extension helper script missing'

    node_binary = get_env('SINGLEFILE_NODE_BINARY') or get_env('NODE_BINARY', 'node')
+    downloads_dir = get_env('CHROME_DOWNLOADS_DIR', '')
+    extensions_dir = get_env('CHROME_EXTENSIONS_DIR', '')
    cmd = [node_binary, str(EXTENSION_SAVE_SCRIPT), f'--url={url}']
+    print(f'[singlefile] cdp_url={cdp_url}', file=sys.stderr)
+    print(f'[singlefile] node={node_binary}', file=sys.stderr)
+    node_resolved = shutil.which(node_binary) if node_binary else None
+    print(f'[singlefile] node_resolved={node_resolved}', file=sys.stderr)
+    print(f'[singlefile] PATH={os.environ.get("PATH","")}', file=sys.stderr)
+    if downloads_dir:
+        print(f'[singlefile] CHROME_DOWNLOADS_DIR={downloads_dir}', file=sys.stderr)
+    if extensions_dir:
+        print(f'[singlefile] CHROME_EXTENSIONS_DIR={extensions_dir}', file=sys.stderr)
+    print(f'[singlefile] helper_cmd={" ".join(cmd)}', file=sys.stderr)

    try:
-        result = subprocess.run(cmd, capture_output=True, timeout=timeout)
-    except subprocess.TimeoutExpired:
-        return False, None, f'Timed out after {timeout} seconds'
+        output_lines: list[str] = []
+        error_lines: list[str] = []
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+        )
+
+        def _read_stream(stream, sink, label: str) -> None:
+            if not stream:
+                return
+            for line in stream:
+                sink.append(line)
+                sys.stderr.write(line)
+                sys.stderr.flush()
+
+        stdout_thread = threading.Thread(target=_read_stream, args=(process.stdout, output_lines, 'stdout'), daemon=True)
+        stderr_thread = threading.Thread(target=_read_stream, args=(process.stderr, error_lines, 'stderr'), daemon=True)
+        stdout_thread.start()
+        stderr_thread.start()
+
+        try:
+            process.wait(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            stdout_thread.join(timeout=1)
+            stderr_thread.join(timeout=1)
+            print(f'[singlefile] Extension helper timed out after {timeout}s', file=sys.stderr)
+            return False, None, f'Timed out after {timeout} seconds'
+
+        stdout_thread.join(timeout=1)
+        stderr_thread.join(timeout=1)
+
+        result_stdout = ''.join(output_lines).encode('utf-8', errors='replace')
+        result_stderr = ''.join(error_lines).encode('utf-8', errors='replace')
+        result_returncode = process.returncode
    except Exception as e:
+        print(f'[singlefile] Extension helper error: {type(e).__name__}: {e}', file=sys.stderr)
        return False, None, f'{type(e).__name__}: {e}'

-    if result.returncode == 0:
+    print(f'[singlefile] helper_returncode={result_returncode}', file=sys.stderr)
+    print(f'[singlefile] helper_stdout_len={len(result_stdout or b"")}', file=sys.stderr)
+    print(f'[singlefile] helper_stderr_len={len(result_stderr or b"")}', file=sys.stderr)
+
+    if result_returncode == 0:
        # Prefer explicit stdout path, fallback to local output file
-        out_text = result.stdout.decode('utf-8', errors='replace').strip()
+        out_text = result_stdout.decode('utf-8', errors='replace').strip()
        if out_text and Path(out_text).exists():
+            print(f'[singlefile] Extension output: {out_text}', file=sys.stderr)
            return True, out_text, ''
        output_path = Path(OUTPUT_DIR) / OUTPUT_FILE
        if output_path.exists() and output_path.stat().st_size > 0:
+            print(f'[singlefile] Extension output: {output_path}', file=sys.stderr)
            return True, str(output_path), ''
        return False, None, 'SingleFile extension completed but no output file found'

-    stderr = result.stderr.decode('utf-8', errors='replace').strip()
-    stdout = result.stdout.decode('utf-8', errors='replace').strip()
+    stderr = result_stderr.decode('utf-8', errors='replace').strip()
+    stdout = result_stdout.decode('utf-8', errors='replace').strip()
    detail = stderr or stdout
    return False, None, detail or 'SingleFile extension failed'

@ -298,6 +360,7 @@ def save_singlefile_with_extension(url: str, timeout: int) -> tuple[bool, str |
 def main(url: str, snapshot_id: str):
    """Archive a URL using SingleFile."""

+    print(f'[singlefile] Hook starting pid={os.getpid()} url={url}', file=sys.stderr)
    output = None
    status = 'failed'
    error = ''
@ -318,11 +381,6 @@ def main(url: str, snapshot_id: str):
        # Prefer SingleFile extension via existing Chrome session
        timeout = get_env_int('SINGLEFILE_TIMEOUT') or get_env_int('TIMEOUT', 120)
        success, output, error = save_singlefile_with_extension(url, timeout)
-
-        # Fallback to single-file-cli if extension path failed
-        if not success:
-            binary = get_env('SINGLEFILE_BINARY', 'single-file')
-            success, output, error = save_singlefile(url, binary)
        status = 'succeeded' if success else 'failed'

    except Exception as e:
--- a/archivebox/plugins/singlefile/singlefile_extension_save.js
+++ b/archivebox/plugins/singlefile/singlefile_extension_save.js
@ -0,0 +1,207 @@
+#!/usr/bin/env node
+/**
+ * Save a page using the SingleFile Chrome extension via an existing Chrome session.
+ *
+ * Usage: singlefile_extension_save.js --url=<url>
+ * Output: prints saved file path on success
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+const CHROME_SESSION_DIR = '../chrome';
+const DOWNLOADS_DIR = process.env.CHROME_DOWNLOADS_DIR ||
+    path.join(process.env.DATA_DIR || './data', 'personas', process.env.ACTIVE_PERSONA || 'Default', 'chrome_downloads');
+
+process.env.CHROME_DOWNLOADS_DIR = DOWNLOADS_DIR;
+
+async function setDownloadDir(page, downloadDir) {
+    try {
+        await fs.promises.mkdir(downloadDir, { recursive: true });
+        const client = await page.target().createCDPSession();
+        try {
+            await client.send('Page.setDownloadBehavior', {
+                behavior: 'allow',
+                downloadPath: downloadDir,
+            });
+        } catch (err) {
+            // Fallback for newer protocol versions
+            await client.send('Browser.setDownloadBehavior', {
+                behavior: 'allow',
+                downloadPath: downloadDir,
+            });
+        }
+    } catch (err) {
+        console.error(`[⚠️] Failed to set download directory: ${err.message || err}`);
+    }
+}
+
+function parseArgs() {
+    const args = {};
+    process.argv.slice(2).forEach((arg) => {
+        if (arg.startsWith('--')) {
+            const [key, ...valueParts] = arg.slice(2).split('=');
+            args[key.replace(/-/g, '_')] = valueParts.join('=') || true;
+        }
+    });
+    return args;
+}
+
+async function main() {
+    const args = parseArgs();
+    const url = args.url;
+
+    if (!url) {
+        console.error('Usage: singlefile_extension_save.js --url=<url>');
+        process.exit(1);
+    }
+
+    console.error(`[singlefile] helper start url=${url}`);
+    console.error(`[singlefile] downloads_dir=${DOWNLOADS_DIR}`);
+    if (process.env.CHROME_EXTENSIONS_DIR) {
+        console.error(`[singlefile] extensions_dir=${process.env.CHROME_EXTENSIONS_DIR}`);
+    }
+
+    try {
+        console.error('[singlefile] loading dependencies...');
+        const puppeteer = require('puppeteer-core');
+        const chromeUtils = require('../chrome/chrome_utils.js');
+        const {
+            EXTENSION,
+            saveSinglefileWithExtension,
+        } = require('./on_Crawl__82_singlefile_install.js');
+        console.error('[singlefile] dependencies loaded');
+
+        // Ensure extension is installed and metadata is cached
+        console.error('[singlefile] ensuring extension cache...');
+        const extension = await chromeUtils.installExtensionWithCache(
+            EXTENSION,
+            { extensionsDir: process.env.CHROME_EXTENSIONS_DIR }
+        );
+        if (!extension) {
+            console.error('[❌] SingleFile extension not installed');
+            process.exit(2);
+        }
+        if (extension.unpacked_path) {
+            const runtimeId = chromeUtils.getExtensionId(extension.unpacked_path);
+            if (runtimeId) {
+                extension.id = runtimeId;
+            }
+        }
+        console.error(`[singlefile] extension ready id=${extension.id} version=${extension.version}`);
+
+        // Connect to existing Chrome session
+        console.error('[singlefile] connecting to chrome session...');
+        const { browser, page } = await chromeUtils.connectToPage({
+            chromeSessionDir: CHROME_SESSION_DIR,
+            timeoutMs: 60000,
+            puppeteer,
+        });
+        console.error('[singlefile] connected to chrome');
+
+        try {
+            // Ensure CDP target discovery is enabled so service_worker targets appear
+            try {
+                const client = await page.createCDPSession();
+                await client.send('Target.setDiscoverTargets', { discover: true });
+                await client.send('Target.setAutoAttach', { autoAttach: true, waitForDebuggerOnStart: false, flatten: true });
+            } catch (err) {
+                console.error(`[singlefile] failed to enable target discovery: ${err.message || err}`);
+            }
+
+            // Wait for extension target to be available, then attach dispatchAction
+            console.error('[singlefile] waiting for extension target...');
+            const deadline = Date.now() + 30000;
+            let matchTarget = null;
+            let matchInfo = null;
+            let lastLog = 0;
+            const wantedName = (extension.name || 'singlefile').toLowerCase();
+
+            while (Date.now() < deadline && !matchTarget) {
+                const targets = browser.targets();
+                for (const target of targets) {
+                    const info = await chromeUtils.isTargetExtension(target);
+                    if (!info?.target_is_extension || !info?.extension_id) {
+                        continue;
+                    }
+                    const manifestName = (info.manifest_name || '').toLowerCase();
+                    const targetUrl = (info.target_url || '').toLowerCase();
+                    const nameMatches = manifestName.includes(wantedName) || manifestName.includes('singlefile') || manifestName.includes('single-file');
+                    const urlMatches = targetUrl.includes('singlefile') || targetUrl.includes('single-file') || targetUrl.includes('single-file-extension');
+                    if (nameMatches || urlMatches) {
+                        matchTarget = target;
+                        matchInfo = info;
+                        break;
+                    }
+                }
+
+                if (!matchTarget) {
+                    if (Date.now() - lastLog > 5000) {
+                        const targetsSummary = [];
+                        for (const target of targets) {
+                            const info = await chromeUtils.isTargetExtension(target);
+                            if (!info?.target_is_extension) {
+                                continue;
+                            }
+                            targetsSummary.push({
+                                type: info.target_type,
+                                url: info.target_url,
+                                extensionId: info.extension_id,
+                                manifestName: info.manifest_name,
+                            });
+                        }
+                        console.error(`[singlefile] waiting... targets total=${targets.length} extensions=${targetsSummary.length} details=${JSON.stringify(targetsSummary)}`);
+                        lastLog = Date.now();
+                    }
+                    await new Promise(r => setTimeout(r, 500));
+                }
+            }
+
+            if (!matchTarget || !matchInfo) {
+                const targets = chromeUtils.getExtensionTargets(browser);
+                console.error(`[singlefile] extension target not found (name=${extension.name})`);
+                console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
+                await browser.disconnect();
+                process.exit(5);
+            }
+
+            // Use the runtime extension id from the matched target
+            extension.id = matchInfo.extension_id;
+
+            console.error('[singlefile] loading extension from target...');
+            await chromeUtils.loadExtensionFromTarget([extension], matchTarget);
+            if (typeof extension.dispatchAction !== 'function') {
+                const targets = chromeUtils.getExtensionTargets(browser);
+                console.error(`[singlefile] extension dispatchAction missing for id=${extension.id}`);
+                console.error(`[singlefile] available targets: ${JSON.stringify(targets)}`);
+                await browser.disconnect();
+                process.exit(6);
+            }
+            console.error('[singlefile] setting download dir...');
+            await setDownloadDir(page, DOWNLOADS_DIR);
+
+            console.error('[singlefile] triggering save via extension...');
+            const output = await saveSinglefileWithExtension(page, extension, { downloadsDir: DOWNLOADS_DIR });
+            if (output && fs.existsSync(output)) {
+                console.error(`[singlefile] saved: ${output}`);
+                console.log(output);
+                await browser.disconnect();
+                process.exit(0);
+            }
+
+            console.error('[❌] SingleFile extension did not produce output');
+            await browser.disconnect();
+            process.exit(3);
+        } catch (err) {
+            await browser.disconnect();
+            throw err;
+        }
+    } catch (err) {
+        console.error(`[❌] ${err.message || err}`);
+        process.exit(4);
+    }
+}
+
+if (require.main === module) {
+    main();
+}
--- a/archivebox/plugins/ublock/tests/test_ublock.py
+++ b/archivebox/plugins/ublock/tests/test_ublock.py
@ -483,8 +483,7 @@ const puppeteer = require('puppeteer-core');

            result = subprocess.run(
                ['node', str(script_path)],
-                cwd=str(tmpdir,
-            env=get_test_env()),
+                cwd=str(tmpdir),
                capture_output=True,
                text=True,
                env=env,
--- a/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
+++ b/archivebox/plugins/wget/on_Snapshot__06_wget.bg.py
@ -144,6 +144,8 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
    try:
        result = subprocess.run(
            cmd,
+            capture_output=True,
+            text=True,
            timeout=timeout * 2,  # Allow extra time for large downloads
        )

@ -166,7 +168,8 @@ def save_wget(url: str, binary: str) -> tuple[bool, str | None, str]:
        output_path = str(html_files[0]) if html_files else str(downloaded_files[0])

        # Parse download stats from wget output
-        output_tail = result.stderr.decode('utf-8', errors='replace').strip().split('\n')[-3:]
+        stderr_text = (result.stderr or '')
+        output_tail = stderr_text.strip().split('\n')[-3:] if stderr_text else []
        files_count = len(downloaded_files)

        return True, output_path, ''
--- a/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
+++ b/archivebox/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py
@ -201,7 +201,7 @@ def save_ytdlp(url: str, binary: str) -> tuple[bool, str | None, str]:
            if 'Unable to extract' in stderr:
                return False, None, 'Unable to extract media info'

-            return False, None, f'yt-dlp error: {stderr[:200]}'
+            return False, None, f'yt-dlp error: {stderr}'

    except subprocess.TimeoutExpired:
        return False, None, f'Timed out after {timeout} seconds'
--- a/archivebox/workers/orchestrator.py
+++ b/archivebox/workers/orchestrator.py
@ -459,7 +459,6 @@ class Orchestrator:
        # Enable progress layout only in TTY + foreground mode
        show_progress = IS_TTY and self.exit_on_idle
        plain_output = not IS_TTY
-
        self.on_startup()

        if not show_progress:
@ -520,7 +519,6 @@ class Orchestrator:

    def _run_orchestrator_loop(self, progress_layout, plain_output: bool = False):
        """Run the main orchestrator loop with optional progress display."""
-        last_queue_sizes = {}
        last_snapshot_count = None
        tick_count = 0
        last_plain_lines: set[tuple[str, str]] = set()
@ -611,6 +609,21 @@ class Orchestrator:
                        seconds = max(0.0, float(total_seconds))
                        return f"{seconds:.1f}s"

+                    def _tail_stderr_line(proc) -> str:
+                        try:
+                            path = getattr(proc, 'stderr_file', None)
+                            if not path or not path.exists():
+                                return ''
+                            with open(path, 'rb') as f:
+                                f.seek(0, os.SEEK_END)
+                                size = f.tell()
+                                f.seek(max(0, size - 4096))
+                                data = f.read().decode('utf-8', errors='ignore')
+                            lines = [ln.strip() for ln in data.splitlines() if ln.strip()]
+                            return lines[-1] if lines else ''
+                        except Exception:
+                            return ''
+
                    tree_data: list[dict] = []
                    for crawl in crawls:
                        urls = crawl.get_urls_list()
@ -684,7 +697,10 @@ class Orchestrator:
                                elapsed = ''
                                timeout = ''
                                size = ''
+                                stderr_tail = ''
                                if ar:
+                                    if ar.process_id and ar.process:
+                                        stderr_tail = _tail_stderr_line(ar.process)
                                    if ar.status == ArchiveResult.StatusChoices.STARTED:
                                        status = 'started'
                                        is_running = True
@ -700,6 +716,8 @@ class Orchestrator:
                                            timeout = _format_seconds(hook_timeout)
                                    else:
                                        status = ar.status
+                                        if ar.process_id and ar.process and ar.process.exit_code == 137:
+                                            status = 'failed'
                                        is_pending = False
                                        start_ts = ar.start_ts or (ar.process.started_at if ar.process_id and ar.process else None)
                                        end_ts = ar.end_ts or (ar.process.ended_at if ar.process_id and ar.process else None)
@ -724,6 +742,7 @@ class Orchestrator:
                                    'is_running': is_running,
                                    'is_pending': is_pending,
                                    'hook_name': hook_name,
+                                    'stderr': stderr_tail,
                                })

                            hooks = []
@ -734,6 +753,7 @@ class Orchestrator:
                                any_succeeded = any(h['status'] == ArchiveResult.StatusChoices.SUCCEEDED for h in hook_entries)
                                any_skipped = any(h['status'] == ArchiveResult.StatusChoices.SKIPPED for h in hook_entries)

+                                stderr_tail = ''
                                if running:
                                    status = 'started'
                                    is_running = True
@ -741,6 +761,7 @@ class Orchestrator:
                                    is_bg = running['is_bg']
                                    elapsed = running.get('elapsed', '')
                                    timeout = running.get('timeout', '')
+                                    stderr_tail = running.get('stderr', '')
                                    size = ''
                                elif pending:
                                    status = 'pending'
@ -749,6 +770,7 @@ class Orchestrator:
                                    is_bg = pending['is_bg']
                                    elapsed = pending.get('elapsed', '') or _format_seconds(0)
                                    timeout = pending.get('timeout', '')
+                                    stderr_tail = pending.get('stderr', '')
                                    size = ''
                                else:
                                    is_running = False
@ -762,6 +784,10 @@ class Orchestrator:
                                        status = 'skipped'
                                    else:
                                        status = 'skipped'
+                                    for h in hook_entries:
+                                        if h.get('stderr'):
+                                            stderr_tail = h['stderr']
+                                            break
                                    total_elapsed = 0.0
                                    has_elapsed = False
                                    for h in hook_entries:
@ -793,6 +819,7 @@ class Orchestrator:
                                    'is_bg': is_bg,
                                    'is_running': is_running,
                                    'is_pending': is_pending,
+                                    'stderr': stderr_tail,
                                })

                            snap_label = _abbrev(f"{str(snap.id)[-8:]} {snap.url or ''}".strip(), max_len=80)
@ -857,8 +884,6 @@ class Orchestrator:

                    progress_layout.update_process_panels(running_processes, pending=pending_processes)

-                    last_queue_sizes = queue_sizes.copy()
-
                    # Update snapshot progress
                    from archivebox.core.models import Snapshot