mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-01-15 16:33:30 +08:00
151 lines
5.2 KiB
Python
Executable File
151 lines
5.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
__package__ = 'archivebox.cli'
|
|
|
|
import os
|
|
import sys
|
|
import shutil
|
|
|
|
import rich_click as click
|
|
from rich import print
|
|
|
|
from archivebox.misc.util import docstring, enforce_types
|
|
|
|
|
|
@enforce_types
|
|
def install(binaries: tuple[str, ...] = (), binproviders: str = '*', dry_run: bool = False) -> None:
|
|
"""Detect and install ArchiveBox dependencies by running a dependency-check crawl
|
|
|
|
Examples:
|
|
archivebox install # Install all dependencies
|
|
archivebox install wget curl # Install only wget and curl
|
|
archivebox install --binproviders=pip yt-dlp # Install yt-dlp using only pip
|
|
archivebox install --binproviders=brew,apt # Install all deps using only brew or apt
|
|
"""
|
|
|
|
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
|
from archivebox.config.paths import ARCHIVE_DIR
|
|
from archivebox.misc.logging import stderr
|
|
from archivebox.cli.archivebox_init import init
|
|
|
|
if not (os.access(ARCHIVE_DIR, os.R_OK) and ARCHIVE_DIR.is_dir()):
|
|
init() # must init full index because we need a db to store Binary entries in
|
|
|
|
# Show what we're installing
|
|
if binaries:
|
|
print(f'\n[green][+] Installing specific binaries: {", ".join(binaries)}[/green]')
|
|
else:
|
|
print('\n[green][+] Detecting and installing all ArchiveBox dependencies...[/green]')
|
|
|
|
if binproviders != '*':
|
|
print(f'[green][+] Using providers: {binproviders}[/green]')
|
|
|
|
if IS_ROOT:
|
|
EUID = os.geteuid()
|
|
print()
|
|
print(f'[yellow]:warning: Running as UID=[blue]{EUID}[/blue].[/yellow]')
|
|
print(f' DATA_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
|
|
print()
|
|
|
|
if dry_run:
|
|
print('[dim]Dry run - would create a crawl to detect dependencies[/dim]')
|
|
return
|
|
|
|
# Set up Django
|
|
from archivebox.config.django import setup_django
|
|
setup_django()
|
|
|
|
from django.utils import timezone
|
|
from archivebox.crawls.models import Crawl
|
|
from archivebox.base_models.models import get_or_create_system_user_pk
|
|
|
|
# Create a crawl for dependency detection
|
|
# Using a minimal crawl that will trigger on_Crawl hooks
|
|
created_by_id = get_or_create_system_user_pk()
|
|
|
|
# Build config for this crawl using existing PLUGINS filter
|
|
crawl_config = {}
|
|
|
|
# Combine binary names and provider names into PLUGINS list
|
|
plugins = []
|
|
if binaries:
|
|
plugins.extend(binaries)
|
|
if binproviders != '*':
|
|
plugins.extend(binproviders.split(','))
|
|
|
|
if plugins:
|
|
crawl_config['PLUGINS'] = ','.join(plugins)
|
|
|
|
crawl, created = Crawl.objects.get_or_create(
|
|
urls='archivebox://install',
|
|
defaults={
|
|
'label': 'Dependency detection',
|
|
'created_by_id': created_by_id,
|
|
'max_depth': 0,
|
|
'status': 'queued',
|
|
'config': crawl_config,
|
|
}
|
|
)
|
|
|
|
# If crawl already existed, reset it to queued state so it can be processed again
|
|
if not created:
|
|
crawl.status = 'queued'
|
|
crawl.retry_at = timezone.now()
|
|
crawl.config = crawl_config # Update config
|
|
crawl.save()
|
|
|
|
print(f'[+] Created dependency detection crawl: {crawl.id}')
|
|
if crawl_config:
|
|
print(f'[+] Crawl config: {crawl_config}')
|
|
print(f'[+] Crawl status: {crawl.status}, retry_at: {crawl.retry_at}')
|
|
|
|
# Verify the crawl is in the queue
|
|
from archivebox.crawls.models import Crawl as CrawlModel
|
|
queued_crawls = CrawlModel.objects.filter(
|
|
retry_at__lte=timezone.now()
|
|
).exclude(
|
|
status__in=CrawlModel.FINAL_STATES
|
|
)
|
|
print(f'[+] Crawls in queue: {queued_crawls.count()}')
|
|
if queued_crawls.exists():
|
|
for c in queued_crawls:
|
|
print(f' - Crawl {c.id}: status={c.status}, retry_at={c.retry_at}')
|
|
|
|
print('[+] Running crawl to detect binaries via on_Crawl hooks...')
|
|
print()
|
|
|
|
# Run the crawl synchronously (this triggers on_Crawl hooks)
|
|
from archivebox.workers.orchestrator import Orchestrator
|
|
orchestrator = Orchestrator(exit_on_idle=True)
|
|
orchestrator.runloop()
|
|
|
|
print()
|
|
|
|
# Check for superuser
|
|
from django.contrib.auth import get_user_model
|
|
User = get_user_model()
|
|
|
|
if not User.objects.filter(is_superuser=True).exclude(username='system').exists():
|
|
stderr('\n[+] Don\'t forget to create a new admin user for the Web UI...', color='green')
|
|
stderr(' archivebox manage createsuperuser')
|
|
|
|
print()
|
|
|
|
# Show version to display full status including installed binaries
|
|
# Django is already loaded, so just import and call the function directly
|
|
from archivebox.cli.archivebox_version import version as show_version
|
|
show_version(quiet=False)
|
|
|
|
|
|
@click.command()
|
|
@click.argument('binaries', nargs=-1, type=str, required=False)
|
|
@click.option('--binproviders', '-p', default='*', help='Comma-separated list of providers to use (pip,npm,brew,apt,env,custom) or * for all', show_default=True)
|
|
@click.option('--dry-run', '-d', is_flag=True, help='Show what would happen without actually running', default=False)
|
|
@docstring(install.__doc__)
|
|
def main(**kwargs) -> None:
|
|
install(**kwargs)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|