diff --git a/archivebox/abid_utils/abid.py b/archivebox/abid_utils/abid.py index 23d6dec5..3c90e83c 100644 --- a/archivebox/abid_utils/abid.py +++ b/archivebox/abid_utils/abid.py @@ -36,6 +36,8 @@ class ABID(NamedTuple): uri: str # e.g. E4A5CCD9 subtype: str # e.g. 01 rand: str # e.g. ZYEBQE + + # salt: str = DEFAULT_ABID_URI_SALT def __getattr__(self, attr: str) -> Any: return getattr(self.ulid, attr) @@ -72,6 +74,10 @@ class ABID(NamedTuple): subtype=suffix[18:20].upper(), rand=suffix[20:26].upper(), ) + + @property + def uri_salt(self) -> str: + return DEFAULT_ABID_URI_SALT @property def suffix(self): @@ -180,7 +186,7 @@ def abid_part_from_rand(rand: Union[str, UUID, None, int]) -> str: return str(rand)[-ABID_RAND_LEN:].upper() -def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID: +def abid_from_values(prefix, ts, uri, subtype, rand, salt=DEFAULT_ABID_URI_SALT) -> ABID: """ Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). """ @@ -188,7 +194,7 @@ def abid_from_values(prefix, ts, uri, subtype, rand) -> ABID: abid = ABID( prefix=abid_part_from_prefix(prefix), ts=abid_part_from_ts(ts), - uri=abid_part_from_uri(uri), + uri=abid_part_from_uri(uri, salt=salt), subtype=abid_part_from_subtype(subtype), rand=abid_part_from_rand(rand), ) diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index de8b3c87..9d0ab1d5 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -26,6 +26,7 @@ from .abid import ( ABID_RAND_LEN, ABID_SUFFIX_LEN, DEFAULT_ABID_PREFIX, + DEFAULT_ABID_URI_SALT, abid_part_from_prefix, abid_from_values ) @@ -69,8 +70,8 @@ class ABIDModel(models.Model): abid_subtype_src = 'None' # e.g. 'self.extractor' abid_rand_src = 'None' # e.g. 'self.uuid' or 'self.id' - id = models.UUIDField(primary_key=True, default=uuid4, editable=True) - uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True) + # id = models.UUIDField(primary_key=True, default=uuid4, editable=True) + # uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True) abid = ABIDField(prefix=abid_prefix) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk) @@ -132,6 +133,7 @@ class ABIDModel(models.Model): uri=uri, subtype=subtype, rand=rand, + salt=DEFAULT_ABID_URI_SALT, ) assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}' return abid diff --git a/archivebox/api/v1_api.py b/archivebox/api/v1_api.py index 4fa5d94b..546ef8a0 100644 --- a/archivebox/api/v1_api.py +++ b/archivebox/api/v1_api.py @@ -63,7 +63,7 @@ api = NinjaAPIWithIOCapture( version='1.0.0', csrf=False, auth=API_AUTH_METHODS, - urls_namespace="api", + urls_namespace="api-1", docs=Swagger(settings={"persistAuthorization": True}), # docs_decorator=login_required, # renderer=ORJSONRenderer(), diff --git a/archivebox/api/v1_core.py b/archivebox/api/v1_core.py index 9046c361..0c701104 100644 --- a/archivebox/api/v1_core.py +++ b/archivebox/api/v1_core.py @@ -1,14 +1,17 @@ __package__ = 'archivebox.api' +import math from uuid import UUID -from typing import List, Optional +from typing import List, Optional, Union, Any from datetime import datetime from django.db.models import Q from django.shortcuts import get_object_or_404 +from django.core.exceptions import ValidationError +from django.contrib.auth import get_user_model from ninja import Router, Schema, FilterSchema, Field, Query -from ninja.pagination import paginate +from ninja.pagination import paginate, PaginationBase from core.models import Snapshot, ArchiveResult, Tag from abid_utils.abid import ABID @@ -17,23 +20,61 @@ router = Router(tags=['Core Models']) +class CustomPagination(PaginationBase): + class Input(Schema): + limit: int = 200 + offset: int = 0 + page: int = 0 + + + class Output(Schema): + total_items: int + total_pages: int + page: int + limit: int + offset: int + num_items: int + items: List[Any] + + def paginate_queryset(self, queryset, pagination: Input, **params): + limit = min(pagination.limit, 500) + offset = pagination.offset or (pagination.page * limit) + total = queryset.count() + total_pages = math.ceil(total / limit) + current_page = math.ceil(offset / (limit + 1)) + items = queryset[offset : offset + limit] + return { + 'total_items': total, + 'total_pages': total_pages, + 'page': current_page, + 'limit': limit, + 'offset': offset, + 'num_items': len(items), + 'items': items, + } + ### ArchiveResult ######################################################################### class ArchiveResultSchema(Schema): + TYPE: str = 'core.models.ArchiveResult' + + id: UUID + old_id: int abid: str - uuid: UUID - pk: str + modified: datetime created: datetime created_by_id: str + created_by_username: str snapshot_abid: str + snapshot_timestamp: str snapshot_url: str snapshot_tags: str extractor: str - cmd_version: str + cmd_version: Optional[str] cmd: List[str] pwd: str status: str @@ -42,6 +83,11 @@ class ArchiveResultSchema(Schema): @staticmethod def resolve_created_by_id(obj): return str(obj.created_by_id) + + @staticmethod + def resolve_created_by_username(obj): + User = get_user_model() + return User.objects.get(id=obj.created_by_id).username @staticmethod def resolve_pk(obj): @@ -59,6 +105,10 @@ class ArchiveResultSchema(Schema): def resolve_created(obj): return obj.start_ts + @staticmethod + def resolve_snapshot_timestamp(obj): + return obj.snapshot.timestamp + @staticmethod def resolve_snapshot_url(obj): return obj.snapshot.url @@ -73,11 +123,10 @@ class ArchiveResultSchema(Schema): class ArchiveResultFilterSchema(FilterSchema): - uuid: Optional[UUID] = Field(None, q='uuid') - # abid: Optional[str] = Field(None, q='abid') + id: Optional[str] = Field(None, q=['id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) - search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains']) - snapshot_uuid: Optional[UUID] = Field(None, q='snapshot_uuid__icontains') + search: Optional[str] = Field(None, q=['snapshot__url__icontains', 'snapshot__title__icontains', 'snapshot__tags__name__icontains', 'extractor', 'output__icontains', 'id__startswith', 'abid__icontains', 'old_id__startswith', 'snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) + snapshot_id: Optional[str] = Field(None, q=['snapshot__id__startswith', 'snapshot__abid__icontains', 'snapshot__timestamp__startswith']) snapshot_url: Optional[str] = Field(None, q='snapshot__url__icontains') snapshot_tag: Optional[str] = Field(None, q='snapshot__tags__name__icontains') @@ -93,19 +142,19 @@ class ArchiveResultFilterSchema(FilterSchema): created__lt: Optional[datetime] = Field(None, q='updated__lt') -@router.get("/archiveresults", response=List[ArchiveResultSchema]) -@paginate -def list_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)): +@router.get("/archiveresults", response=List[ArchiveResultSchema], url_name="get_archiveresult") +@paginate(CustomPagination) +def get_archiveresults(request, filters: ArchiveResultFilterSchema = Query(...)): """List all ArchiveResult entries matching these filters.""" qs = ArchiveResult.objects.all() - results = filters.filter(qs) + results = filters.filter(qs).distinct() return results -@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema) +@router.get("/archiveresult/{archiveresult_id}", response=ArchiveResultSchema, url_name="get_archiveresult") def get_archiveresult(request, archiveresult_id: str): - """Get a specific ArchiveResult by abid, uuid, or pk.""" - return ArchiveResult.objects.get(Q(pk__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(uuid__icontains=archiveresult_id)) + """Get a specific ArchiveResult by pk, abid, or old_id.""" + return ArchiveResult.objects.get(Q(id__icontains=archiveresult_id) | Q(abid__icontains=archiveresult_id) | Q(old_id__icontains=archiveresult_id)) # @router.post("/archiveresult", response=ArchiveResultSchema) @@ -137,12 +186,16 @@ def get_archiveresult(request, archiveresult_id: str): class SnapshotSchema(Schema): + TYPE: str = 'core.models.Snapshot' + + id: UUID + old_id: UUID abid: str - uuid: UUID - pk: str + modified: datetime created: datetime created_by_id: str + created_by_username: str url: str tags: str @@ -160,6 +213,11 @@ class SnapshotSchema(Schema): @staticmethod def resolve_created_by_id(obj): return str(obj.created_by_id) + + @staticmethod + def resolve_created_by_username(obj): + User = get_user_model() + return User.objects.get(id=obj.created_by_id).username @staticmethod def resolve_pk(obj): @@ -189,10 +247,14 @@ class SnapshotSchema(Schema): class SnapshotFilterSchema(FilterSchema): + id: Optional[str] = Field(None, q=['id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith']) + + old_id: Optional[str] = Field(None, q='old_id__icontains') abid: Optional[str] = Field(None, q='abid__icontains') - uuid: Optional[str] = Field(None, q='uuid__icontains') - pk: Optional[str] = Field(None, q='pk__icontains') - created_by_id: str = Field(None, q='created_by_id__icontains') + + created_by_id: str = Field(None, q='created_by_id') + created_by_username: str = Field(None, q='created_by__username__icontains') + created__gte: datetime = Field(None, q='created__gte') created__lt: datetime = Field(None, q='created__lt') created: datetime = Field(None, q='created') @@ -200,7 +262,7 @@ class SnapshotFilterSchema(FilterSchema): modified__gte: datetime = Field(None, q='modified__gte') modified__lt: datetime = Field(None, q='modified__lt') - search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'abid__icontains', 'uuid__icontains']) + search: Optional[str] = Field(None, q=['url__icontains', 'title__icontains', 'tags__name__icontains', 'id__icontains', 'abid__icontains', 'old_id__icontains', 'timestamp__startswith']) url: Optional[str] = Field(None, q='url') tag: Optional[str] = Field(None, q='tags__name') title: Optional[str] = Field(None, q='title__icontains') @@ -211,35 +273,33 @@ class SnapshotFilterSchema(FilterSchema): -@router.get("/snapshots", response=List[SnapshotSchema]) -@paginate -def list_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=True): +@router.get("/snapshots", response=List[SnapshotSchema], url_name="get_snapshots") +@paginate(CustomPagination) +def get_snapshots(request, filters: SnapshotFilterSchema = Query(...), with_archiveresults: bool=False): """List all Snapshot entries matching these filters.""" request.with_archiveresults = with_archiveresults qs = Snapshot.objects.all() - results = filters.filter(qs) + results = filters.filter(qs).distinct() return results -@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema) +@router.get("/snapshot/{snapshot_id}", response=SnapshotSchema, url_name="get_snapshot") def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True): """Get a specific Snapshot by abid, uuid, or pk.""" request.with_archiveresults = with_archiveresults snapshot = None try: - snapshot = Snapshot.objects.get(Q(uuid__startswith=snapshot_id) | Q(abid__startswith=snapshot_id)| Q(pk__startswith=snapshot_id)) + snapshot = Snapshot.objects.get(Q(abid__startswith=snapshot_id) | Q(id__startswith=snapshot_id) | Q(old_id__startswith=snapshot_id) | Q(timestamp__startswith=snapshot_id)) except Snapshot.DoesNotExist: pass try: - snapshot = snapshot or Snapshot.objects.get() + snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id)) except Snapshot.DoesNotExist: pass - try: - snapshot = snapshot or Snapshot.objects.get(Q(uuid__icontains=snapshot_id) | Q(abid__icontains=snapshot_id)) - except Snapshot.DoesNotExist: - pass + if not snapshot: + raise Snapshot.DoesNotExist return snapshot @@ -271,21 +331,94 @@ def get_snapshot(request, snapshot_id: str, with_archiveresults: bool=True): class TagSchema(Schema): - abid: Optional[UUID] = Field(None, q='abid') - uuid: Optional[UUID] = Field(None, q='uuid') - pk: Optional[UUID] = Field(None, q='pk') + TYPE: str = 'core.models.Tag' + + id: UUID + old_id: str + abid: str + modified: datetime created: datetime created_by_id: str + created_by_username: str name: str slug: str + num_snapshots: int + snapshots: List[SnapshotSchema] + @staticmethod + def resolve_old_id(obj): + return str(obj.old_id) @staticmethod def resolve_created_by_id(obj): return str(obj.created_by_id) + + @staticmethod + def resolve_created_by_username(obj): + User = get_user_model() + return User.objects.get(id=obj.created_by_id).username + + @staticmethod + def resolve_num_snapshots(obj, context): + return obj.snapshot_set.all().distinct().count() -@router.get("/tags", response=List[TagSchema]) -def list_tags(request): - return Tag.objects.all() + @staticmethod + def resolve_snapshots(obj, context): + if context['request'].with_snapshots: + return obj.snapshot_set.all().distinct() + return Snapshot.objects.none() + +@router.get("/tags", response=List[TagSchema], url_name="get_tags") +@paginate(CustomPagination) +def get_tags(request): + request.with_snapshots = False + request.with_archiveresults = False + return Tag.objects.all().distinct() + +@router.get("/tag/{tag_id}", response=TagSchema, url_name="get_tag") +def get_tag(request, tag_id: str, with_snapshots: bool=True): + request.with_snapshots = with_snapshots + request.with_archiveresults = False + tag = None + try: + tag = tag or Tag.objects.get(old_id__icontains=tag_id) + except (Tag.DoesNotExist, ValidationError, ValueError): + pass + + try: + tag = Tag.objects.get(abid__icontains=tag_id) + except (Tag.DoesNotExist, ValidationError): + pass + + try: + tag = tag or Tag.objects.get(id__icontains=tag_id) + except (Tag.DoesNotExist, ValidationError): + pass + return tag + + + +@router.get("/any/{abid}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema], url_name="get_any") +def get_any(request, abid: str): + request.with_snapshots = False + request.with_archiveresults = False + + response = None + try: + response = response or get_snapshot(request, abid) + except Exception: + pass + + try: + response = response or get_archiveresult(request, abid) + except Exception: + pass + + try: + response = response or get_tag(request, abid) + except Exception: + pass + + return response diff --git a/archivebox/config.py b/archivebox/config.py index 0151c3c2..afa334c6 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -1036,6 +1036,11 @@ def get_data_locations(config: ConfigDict) -> ConfigValue: 'enabled': True, 'is_valid': config['SOURCES_DIR'].exists(), }, + 'PERSONAS_DIR': { + 'path': config['PERSONAS_DIR'].resolve(), + 'enabled': True, + 'is_valid': config['PERSONAS_DIR'].exists(), + }, 'LOGS_DIR': { 'path': config['LOGS_DIR'].resolve(), 'enabled': True, @@ -1051,11 +1056,6 @@ def get_data_locations(config: ConfigDict) -> ConfigValue: 'enabled': bool(config['CUSTOM_TEMPLATES_DIR']), 'is_valid': config['CUSTOM_TEMPLATES_DIR'] and Path(config['CUSTOM_TEMPLATES_DIR']).exists(), }, - 'PERSONAS_DIR': { - 'path': config['PERSONAS_DIR'].resolve(), - 'enabled': True, - 'is_valid': config['PERSONAS_DIR'].exists(), - }, # managed by bin/docker_entrypoint.sh and python-crontab: # 'CRONTABS_DIR': { # 'path': config['CRONTABS_DIR'].resolve(), diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index 7e1aa7f9..78b6bdf8 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -1,17 +1,19 @@ __package__ = 'archivebox.core' +import json from io import StringIO from pathlib import Path from contextlib import redirect_stdout from datetime import datetime, timezone from django.contrib import admin -from django.db.models import Count -from django.urls import path +from django.db.models import Count, Q +from django.urls import path, reverse from django.utils.html import format_html from django.utils.safestring import mark_safe from django.shortcuts import render, redirect from django.contrib.auth import get_user_model +from django.core.exceptions import ValidationError from django import forms @@ -20,7 +22,7 @@ from signal_webhooks.admin import WebhookAdmin, get_webhook_model from ..util import htmldecode, urldecode, ansi_to_html -from core.models import Snapshot, ArchiveResult, Tag +from core.models import Snapshot, ArchiveResult, Tag, SnapshotTag from core.forms import AddLinkForm from core.mixins import SearchResultsAdminMixin @@ -124,31 +126,55 @@ archivebox_admin.get_urls = get_urls(archivebox_admin.get_urls).__get__(archiveb class ArchiveResultInline(admin.TabularInline): + name = 'Archive Results Log' model = ArchiveResult + # fk_name = 'snapshot' + extra = 1 + readonly_fields = ('result_id', 'start_ts', 'end_ts', 'extractor', 'command', 'cmd_version') + fields = ('id', *readonly_fields, 'status', 'output') + show_change_link = True + # # classes = ['collapse'] + # # list_display_links = ['abid'] + + def result_id(self, obj): + return format_html('[{}]', reverse('admin:core_archiveresult_change', args=(obj.id,)), obj.abid) + + def command(self, obj): + return format_html('{}', " ".join(obj.cmd or [])) + class TagInline(admin.TabularInline): - model = Snapshot.tags.through + model = Tag.snapshot_set.through + # fk_name = 'snapshot' + fields = ('id', 'tag') + extra = 1 + # min_num = 1 + max_num = 1000 + autocomplete_fields = ( + 'tag', + ) from django.contrib.admin.helpers import ActionForm -from django.contrib.admin.widgets import AutocompleteSelectMultiple +from django.contrib.admin.widgets import FilteredSelectMultiple -class AutocompleteTags: - model = Tag - search_fields = ['name'] - name = 'tags' - remote_field = TagInline +# class AutocompleteTags: +# model = Tag +# search_fields = ['name'] +# name = 'name' +# # source_field = 'name' +# remote_field = Tag._meta.get_field('name') -class AutocompleteTagsAdminStub: - name = 'admin' +# class AutocompleteTagsAdminStub: +# name = 'admin' class SnapshotActionForm(ActionForm): tags = forms.ModelMultipleChoiceField( queryset=Tag.objects.all(), required=False, - widget=AutocompleteSelectMultiple( - AutocompleteTags(), - AutocompleteTagsAdminStub(), + widget=FilteredSelectMultiple( + 'core_tag__name', + False, ), ) @@ -168,52 +194,92 @@ def get_abid_info(self, obj): return format_html( # URL Hash: {}
''' -     DB ID:      {}
-        .id:                      {}    
-        .uuid:                  {}    
-
+ {}     📖 API DOCS +

-     ABID:       {}
-         TS:                  {} ({})
-         URI:                 {} ({})
-         SUBTYPE:       {} ({})     -         RAND:              {} ({})

-         as ULID:               {}
-         as UUID:              {}

+     TS:                  {}        ({})
+     URI:                 {}           ({})
+     SUBTYPE:       {} ({})     +   RAND:   {} ({})     +   SALT:   {} +

+     .abid:                   {}
+     .abid.uuid:           {}
+     .id:                       {}
+     .old_id:                {}
''', - obj.pk, - obj.id, - obj.uuid, - obj.abid, + obj.api_url, obj.api_url, obj.api_docs_url, obj.ABID.ts, obj.abid_values['ts'].isoformat() if isinstance(obj.abid_values['ts'], datetime) else obj.abid_values['ts'], obj.ABID.uri, str(obj.abid_values['uri']), obj.ABID.subtype, str(obj.abid_values['subtype']), obj.ABID.rand, str(obj.abid_values['rand'])[-7:], - obj.ABID.ulid, - obj.ABID.uuid, + obj.ABID.uri_salt, + str(obj.abid), + str(obj.ABID.uuid), + obj.id, + getattr(obj, 'old_id', ''), ) @admin.register(Snapshot, site=archivebox_admin) class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): + class Meta: + model = Snapshot + list_display = ('added', 'title_str', 'files', 'size', 'url_str') + # list_editable = ('title',) sort_fields = ('title_str', 'url_str', 'added', 'files') - readonly_fields = ('admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'identifiers') - search_fields = ('id', 'url', 'abid', 'uuid', 'timestamp', 'title', 'tags__name') - fields = ('url', 'timestamp', 'created_by', 'tags', 'title', *readonly_fields) - list_filter = ('added', 'updated', 'tags', 'archiveresult__status', 'created_by') + readonly_fields = ('tags', 'timestamp', 'admin_actions', 'status_info', 'bookmarked', 'added', 'updated', 'created', 'modified', 'API', 'link_dir') + search_fields = ('id', 'url', 'abid', 'old_id', 'timestamp', 'title', 'tags__name') + list_filter = ('added', 'updated', 'archiveresult__status', 'created_by', 'tags') + fields = ('url', 'created_by', 'title', *readonly_fields) ordering = ['-added'] actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots'] autocomplete_fields = ['tags'] - inlines = [ArchiveResultInline] + inlines = [TagInline, ArchiveResultInline] list_per_page = SNAPSHOTS_PER_PAGE action_form = SnapshotActionForm + save_on_top = True + def changelist_view(self, request, extra_context=None): extra_context = extra_context or {} - return super().changelist_view(request, extra_context | GLOBAL_CONTEXT) + try: + return super().changelist_view(request, extra_context | GLOBAL_CONTEXT) + except Exception as e: + self.message_user(request, f'Error occurred while loading the page: {str(e)} {request.GET} {request.POST}') + return super().changelist_view(request, GLOBAL_CONTEXT) + + def change_view(self, request, object_id, form_url="", extra_context=None): + snapshot = None + + try: + snapshot = snapshot or Snapshot.objects.get(id=object_id) + except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError): + pass + + try: + snapshot = snapshot or Snapshot.objects.get(abid=Snapshot.abid_prefix + object_id.split('_', 1)[-1]) + except (Snapshot.DoesNotExist, ValidationError): + pass + + + try: + snapshot = snapshot or Snapshot.objects.get(old_id=object_id) + except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned, ValidationError): + pass + + if snapshot: + object_id = str(snapshot.id) + + return super().change_view( + request, + object_id, + form_url, + extra_context=extra_context, + ) def get_urls(self): urls = super().get_urls() @@ -224,7 +290,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): def get_queryset(self, request): self.request = request - return super().get_queryset(request).prefetch_related('tags').annotate(archiveresult_count=Count('archiveresult')) + return super().get_queryset(request).prefetch_related('tags', 'archiveresult_set').annotate(archiveresult_count=Count('archiveresult')) def tag_list(self, obj): return ', '.join(obj.tags.values_list('name', flat=True)) @@ -285,8 +351,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): obj.extension or '-', ) - def identifiers(self, obj): - return get_abid_info(self, obj) + def API(self, obj): + try: + return get_abid_info(self, obj) + except Exception as e: + return str(e) @admin.display( description='Title', @@ -446,20 +515,34 @@ class SnapshotAdmin(SearchResultsAdminMixin, admin.ModelAdmin): +# @admin.register(SnapshotTag, site=archivebox_admin) +# class SnapshotTagAdmin(admin.ModelAdmin): +# list_display = ('id', 'snapshot', 'tag') +# sort_fields = ('id', 'snapshot', 'tag') +# search_fields = ('id', 'snapshot_id', 'tag_id') +# fields = ('snapshot', 'id') +# actions = ['delete_selected'] +# ordering = ['-id'] + +# def API(self, obj): +# return get_abid_info(self, obj) @admin.register(Tag, site=archivebox_admin) class TagAdmin(admin.ModelAdmin): - list_display = ('slug', 'name', 'num_snapshots', 'snapshots', 'abid') - sort_fields = ('id', 'name', 'slug', 'abid') - readonly_fields = ('created', 'modified', 'identifiers', 'num_snapshots', 'snapshots') - search_fields = ('id', 'abid', 'uuid', 'name', 'slug') - fields = ('name', 'slug', 'created_by', *readonly_fields, ) + list_display = ('abid', 'name', 'created', 'created_by', 'num_snapshots', 'snapshots') + sort_fields = ('name', 'slug', 'abid', 'created_by', 'created') + readonly_fields = ('slug', 'abid', 'created', 'modified', 'API', 'num_snapshots', 'snapshots') + search_fields = ('abid', 'name', 'slug') + fields = ('name', 'created_by', *readonly_fields) actions = ['delete_selected'] - ordering = ['-id'] + ordering = ['-created'] - def identifiers(self, obj): - return get_abid_info(self, obj) + def API(self, obj): + try: + return get_abid_info(self, obj) + except Exception as e: + return str(e) def num_snapshots(self, tag): return format_html( @@ -472,11 +555,10 @@ class TagAdmin(admin.ModelAdmin): total_count = tag.snapshot_set.count() return mark_safe('
'.join( format_html( - '{} [{}] {}', - snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...', + '[{}] {}', snap.pk, - snap.abid, - snap.url, + snap.updated.strftime('%Y-%m-%d %H:%M') if snap.updated else 'pending...', + snap.url[:64], ) for snap in tag.snapshot_set.order_by('-updated')[:10] ) + (f'
and {total_count-10} more...' if tag.snapshot_set.count() > 10 else '')) @@ -486,9 +568,9 @@ class TagAdmin(admin.ModelAdmin): class ArchiveResultAdmin(admin.ModelAdmin): list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str') sort_fields = ('start_ts', 'extractor', 'status') - readonly_fields = ('snapshot_info', 'tags_str', 'created_by', 'created', 'modified', 'identifiers') - search_fields = ('id', 'uuid', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') - fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'cmd_version', *readonly_fields) + readonly_fields = ('snapshot_info', 'tags_str', 'created', 'modified', 'API') + search_fields = ('id', 'old_id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') + fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'cmd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', *readonly_fields) autocomplete_fields = ['snapshot'] list_filter = ('status', 'extractor', 'start_ts', 'cmd_version') @@ -507,8 +589,11 @@ class ArchiveResultAdmin(admin.ModelAdmin): result.snapshot.url[:128], ) - def identifiers(self, obj): - return get_abid_info(self, obj) + def API(self, obj): + try: + return get_abid_info(self, obj) + except Exception as e: + return str(e) @admin.display( description='Snapshot Tags' diff --git a/archivebox/core/migrations/0024_auto_20240513_1143.py b/archivebox/core/migrations/0024_auto_20240513_1143.py index 95652a07..f8cf645c 100644 --- a/archivebox/core/migrations/0024_auto_20240513_1143.py +++ b/archivebox/core/migrations/0024_auto_20240513_1143.py @@ -2,7 +2,7 @@ from django.db import migrations from datetime import datetime -from abid_utils.abid import abid_from_values +from abid_utils.abid import abid_from_values, DEFAULT_ABID_URI_SALT def calculate_abid(self): @@ -41,6 +41,7 @@ def calculate_abid(self): uri=uri, subtype=subtype, rand=rand, + salt=DEFAULT_ABID_URI_SALT, ) assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}' return abid @@ -64,7 +65,8 @@ def generate_snapshot_abids(apps, schema_editor): snapshot.abid_rand_src = 'self.uuid' snapshot.abid = calculate_abid(snapshot) - snapshot.save(update_fields=["abid"]) + snapshot.uuid = snapshot.abid.uuid + snapshot.save(update_fields=["abid", "uuid"]) def generate_archiveresult_abids(apps, schema_editor): print(' Generating ArchiveResult.abid values... (may take an hour or longer for large collections...)') diff --git a/archivebox/core/migrations/0027_update_snapshot_ids.py b/archivebox/core/migrations/0027_update_snapshot_ids.py new file mode 100644 index 00000000..ad197c04 --- /dev/null +++ b/archivebox/core/migrations/0027_update_snapshot_ids.py @@ -0,0 +1,106 @@ +# Generated by Django 5.0.6 on 2024-08-18 02:48 + +from django.db import migrations + +from django.db import migrations +from datetime import datetime +from abid_utils.abid import ABID, abid_from_values, DEFAULT_ABID_URI_SALT + + +def calculate_abid(self): + """ + Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). + """ + prefix = self.abid_prefix + ts = eval(self.abid_ts_src) + uri = eval(self.abid_uri_src) + subtype = eval(self.abid_subtype_src) + rand = eval(self.abid_rand_src) + + if (not prefix) or prefix == 'obj_': + suggested_abid = self.__class__.__name__[:3].lower() + raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})') + + if not ts: + ts = datetime.utcfromtimestamp(0) + print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat()) + + if not uri: + uri = str(self) + print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri) + + if not subtype: + subtype = self.__class__.__name__ + print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype) + + if not rand: + rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk') + print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand) + + abid = abid_from_values( + prefix=prefix, + ts=ts, + uri=uri, + subtype=subtype, + rand=rand, + salt=DEFAULT_ABID_URI_SALT, + ) + assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}' + return abid + +def update_snapshot_ids(apps, schema_editor): + Snapshot = apps.get_model("core", "Snapshot") + num_total = Snapshot.objects.all().count() + print(f' Updating {num_total} Snapshot.id, Snapshot.uuid values in place...') + for idx, snapshot in enumerate(Snapshot.objects.all().only('abid').iterator()): + assert snapshot.abid + snapshot.abid_prefix = 'snp_' + snapshot.abid_ts_src = 'self.added' + snapshot.abid_uri_src = 'self.url' + snapshot.abid_subtype_src = '"01"' + snapshot.abid_rand_src = 'self.uuid' + + snapshot.abid = calculate_abid(snapshot) + snapshot.uuid = snapshot.abid.uuid + snapshot.save(update_fields=["abid", "uuid"]) + assert str(ABID.parse(snapshot.abid).uuid) == str(snapshot.uuid) + if idx % 1000 == 0: + print(f'Migrated {idx}/{num_total} Snapshot objects...') + +def update_archiveresult_ids(apps, schema_editor): + Snapshot = apps.get_model("core", "Snapshot") + ArchiveResult = apps.get_model("core", "ArchiveResult") + num_total = ArchiveResult.objects.all().count() + print(f' Updating {num_total} ArchiveResult.id, ArchiveResult.uuid values in place... (may take an hour or longer for large collections...)') + for idx, result in enumerate(ArchiveResult.objects.all().only('abid', 'snapshot_id').iterator()): + assert result.abid + result.abid_prefix = 'res_' + result.snapshot = Snapshot.objects.get(pk=result.snapshot_id) + result.snapshot_added = result.snapshot.added + result.snapshot_url = result.snapshot.url + result.abid_ts_src = 'self.snapshot_added' + result.abid_uri_src = 'self.snapshot_url' + result.abid_subtype_src = 'self.extractor' + result.abid_rand_src = 'self.id' + + result.abid = calculate_abid(result) + result.uuid = result.abid.uuid + result.uuid = ABID.parse(result.abid).uuid + result.save(update_fields=["abid", "uuid"]) + assert str(ABID.parse(result.abid).uuid) == str(result.uuid) + if idx % 5000 == 0: + print(f'Migrated {idx}/{num_total} ArchiveResult objects...') + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0026_archiveresult_created_archiveresult_created_by_and_more'), + ] + + operations = [ + migrations.RunPython(update_snapshot_ids, reverse_code=migrations.RunPython.noop), + migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop), + ] + + diff --git a/archivebox/core/migrations/0028_alter_archiveresult_uuid.py b/archivebox/core/migrations/0028_alter_archiveresult_uuid.py new file mode 100644 index 00000000..9b10f044 --- /dev/null +++ b/archivebox/core/migrations/0028_alter_archiveresult_uuid.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-18 04:28 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0027_update_snapshot_ids'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='uuid', + field=models.UUIDField(default=uuid.uuid4), + ), + ] diff --git a/archivebox/core/migrations/0029_alter_archiveresult_id.py b/archivebox/core/migrations/0029_alter_archiveresult_id.py new file mode 100644 index 00000000..7464a670 --- /dev/null +++ b/archivebox/core/migrations/0029_alter_archiveresult_id.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-18 04:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0028_alter_archiveresult_uuid'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='id', + field=models.BigIntegerField(primary_key=True, serialize=False, verbose_name='ID'), + ), + ] diff --git a/archivebox/core/migrations/0030_alter_archiveresult_uuid.py b/archivebox/core/migrations/0030_alter_archiveresult_uuid.py new file mode 100644 index 00000000..3c1ad788 --- /dev/null +++ b/archivebox/core/migrations/0030_alter_archiveresult_uuid.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0029_alter_archiveresult_id'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='uuid', + field=models.UUIDField(unique=True), + ), + ] diff --git a/archivebox/core/migrations/0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more.py b/archivebox/core/migrations/0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more.py new file mode 100644 index 00000000..64fd6cbe --- /dev/null +++ b/archivebox/core/migrations/0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more.py @@ -0,0 +1,34 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:09 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0030_alter_archiveresult_uuid'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='id', + field=models.IntegerField(default=uuid.uuid4, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='archiveresult', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, unique=True), + ), + migrations.AlterField( + model_name='snapshot', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, unique=True), + ), + migrations.AlterField( + model_name='tag', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, null=True, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0032_alter_archiveresult_id.py b/archivebox/core/migrations/0032_alter_archiveresult_id.py new file mode 100644 index 00000000..98299a31 --- /dev/null +++ b/archivebox/core/migrations/0032_alter_archiveresult_id.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:20 + +import core.models +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0031_alter_archiveresult_id_alter_archiveresult_uuid_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='id', + field=models.BigIntegerField(default=core.models.rand_int_id, primary_key=True, serialize=False, verbose_name='ID'), + ), + ] diff --git a/archivebox/core/migrations/0033_rename_id_archiveresult_old_id.py b/archivebox/core/migrations/0033_rename_id_archiveresult_old_id.py new file mode 100644 index 00000000..ebced58e --- /dev/null +++ b/archivebox/core/migrations/0033_rename_id_archiveresult_old_id.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:34 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0032_alter_archiveresult_id'), + ] + + operations = [ + migrations.RenameField( + model_name='archiveresult', + old_name='id', + new_name='old_id', + ), + ] diff --git a/archivebox/core/migrations/0034_alter_archiveresult_old_id_alter_archiveresult_uuid.py b/archivebox/core/migrations/0034_alter_archiveresult_old_id_alter_archiveresult_uuid.py new file mode 100644 index 00000000..121a2154 --- /dev/null +++ b/archivebox/core/migrations/0034_alter_archiveresult_old_id_alter_archiveresult_uuid.py @@ -0,0 +1,41 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:37 + +import core.models +import uuid +from django.db import migrations, models + +from abid_utils.abid import ABID + + +def update_archiveresult_ids(apps, schema_editor): + ArchiveResult = apps.get_model("core", "ArchiveResult") + num_total = ArchiveResult.objects.all().count() + print(f' Updating {num_total} ArchiveResult.id, ArchiveResult.uuid values in place... (may take an hour or longer for large collections...)') + for idx, result in enumerate(ArchiveResult.objects.all().only('abid').iterator()): + assert result.abid + result.uuid = ABID.parse(result.abid).uuid + result.save(update_fields=["uuid"]) + assert str(ABID.parse(result.abid).uuid) == str(result.uuid) + if idx % 2500 == 0: + print(f'Migrated {idx}/{num_total} ArchiveResult objects...') + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0033_rename_id_archiveresult_old_id'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='old_id', + field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, verbose_name='ID'), + ), + migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop), + migrations.AlterField( + model_name='archiveresult', + name='uuid', + field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0035_remove_archiveresult_uuid_archiveresult_id.py b/archivebox/core/migrations/0035_remove_archiveresult_uuid_archiveresult_id.py new file mode 100644 index 00000000..26287e3c --- /dev/null +++ b/archivebox/core/migrations/0035_remove_archiveresult_uuid_archiveresult_id.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:49 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0034_alter_archiveresult_old_id_alter_archiveresult_uuid'), + ] + + operations = [ + migrations.RenameField( + model_name='archiveresult', + old_name='uuid', + new_name='id', + ), + ] diff --git a/archivebox/core/migrations/0036_alter_archiveresult_id_alter_archiveresult_old_id.py b/archivebox/core/migrations/0036_alter_archiveresult_id_alter_archiveresult_old_id.py new file mode 100644 index 00000000..10b4f9c6 --- /dev/null +++ b/archivebox/core/migrations/0036_alter_archiveresult_id_alter_archiveresult_old_id.py @@ -0,0 +1,25 @@ +# Generated by Django 5.0.6 on 2024-08-18 05:59 + +import core.models +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0035_remove_archiveresult_uuid_archiveresult_id'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='id', + field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True, verbose_name='ID'), + ), + migrations.AlterField( + model_name='archiveresult', + name='old_id', + field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, verbose_name='Old ID'), + ), + ] diff --git a/archivebox/core/migrations/0037_rename_id_snapshot_old_id.py b/archivebox/core/migrations/0037_rename_id_snapshot_old_id.py new file mode 100644 index 00000000..7d901d96 --- /dev/null +++ b/archivebox/core/migrations/0037_rename_id_snapshot_old_id.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:08 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0036_alter_archiveresult_id_alter_archiveresult_old_id'), + ] + + operations = [ + migrations.RenameField( + model_name='snapshot', + old_name='id', + new_name='old_id', + ), + ] diff --git a/archivebox/core/migrations/0038_rename_uuid_snapshot_id.py b/archivebox/core/migrations/0038_rename_uuid_snapshot_id.py new file mode 100644 index 00000000..d22a8fc4 --- /dev/null +++ b/archivebox/core/migrations/0038_rename_uuid_snapshot_id.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:09 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0037_rename_id_snapshot_old_id'), + ] + + operations = [ + migrations.RenameField( + model_name='snapshot', + old_name='uuid', + new_name='id', + ), + ] diff --git a/archivebox/core/migrations/0039_rename_snapshot_archiveresult_snapshot_old.py b/archivebox/core/migrations/0039_rename_snapshot_archiveresult_snapshot_old.py new file mode 100644 index 00000000..7c2a4e29 --- /dev/null +++ b/archivebox/core/migrations/0039_rename_snapshot_archiveresult_snapshot_old.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:25 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0038_rename_uuid_snapshot_id'), + ] + + operations = [ + migrations.RenameField( + model_name='archiveresult', + old_name='snapshot', + new_name='snapshot_old', + ), + ] diff --git a/archivebox/core/migrations/0040_archiveresult_snapshot.py b/archivebox/core/migrations/0040_archiveresult_snapshot.py new file mode 100644 index 00000000..8c09d079 --- /dev/null +++ b/archivebox/core/migrations/0040_archiveresult_snapshot.py @@ -0,0 +1,34 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:46 + +import django.db.models.deletion +from django.db import migrations, models + +def update_archiveresult_snapshot_ids(apps, schema_editor): + ArchiveResult = apps.get_model("core", "ArchiveResult") + Snapshot = apps.get_model("core", "Snapshot") + num_total = ArchiveResult.objects.all().count() + print(f' Updating {num_total} ArchiveResult.snapshot_id values in place... (may take an hour or longer for large collections...)') + for idx, result in enumerate(ArchiveResult.objects.all().only('snapshot_old_id').iterator(chunk_size=5000)): + assert result.snapshot_old_id + snapshot = Snapshot.objects.only('id').get(old_id=result.snapshot_old_id) + result.snapshot_id = snapshot.id + result.save(update_fields=["snapshot_id"]) + assert str(result.snapshot_id) == str(snapshot.id) + if idx % 5000 == 0: + print(f'Migrated {idx}/{num_total} ArchiveResult objects...') + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0039_rename_snapshot_archiveresult_snapshot_old'), + ] + + operations = [ + migrations.AddField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresults', to='core.snapshot', to_field='id'), + ), + migrations.RunPython(update_archiveresult_snapshot_ids, reverse_code=migrations.RunPython.noop), + ] diff --git a/archivebox/core/migrations/0041_alter_archiveresult_snapshot_and_more.py b/archivebox/core/migrations/0041_alter_archiveresult_snapshot_and_more.py new file mode 100644 index 00000000..d4be8875 --- /dev/null +++ b/archivebox/core/migrations/0041_alter_archiveresult_snapshot_and_more.py @@ -0,0 +1,24 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:50 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0040_archiveresult_snapshot'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'), + ), + migrations.AlterField( + model_name='archiveresult', + name='snapshot_old', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='archiveresults_old', to='core.snapshot'), + ), + ] diff --git a/archivebox/core/migrations/0042_remove_archiveresult_snapshot_old.py b/archivebox/core/migrations/0042_remove_archiveresult_snapshot_old.py new file mode 100644 index 00000000..3fe9f316 --- /dev/null +++ b/archivebox/core/migrations/0042_remove_archiveresult_snapshot_old.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:51 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0041_alter_archiveresult_snapshot_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='archiveresult', + name='snapshot_old', + ), + ] diff --git a/archivebox/core/migrations/0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py b/archivebox/core/migrations/0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py new file mode 100644 index 00000000..c0acddb0 --- /dev/null +++ b/archivebox/core/migrations/0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py @@ -0,0 +1,20 @@ +# Generated by Django 5.0.6 on 2024-08-18 06:52 + +import django.db.models.deletion +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0042_remove_archiveresult_snapshot_old'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'), + ), + ] diff --git a/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py b/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py new file mode 100644 index 00000000..b7531233 --- /dev/null +++ b/archivebox/core/migrations/0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more.py @@ -0,0 +1,40 @@ +# Generated by Django 5.0.6 on 2024-08-19 23:01 + +import django.db.models.deletion +import uuid +from django.db import migrations, models + + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0043_alter_archiveresult_snapshot_alter_snapshot_id_and_more'), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + database_operations=[ + # No-op, SnapshotTag model already exists in DB + ], + state_operations=[ + migrations.CreateModel( + name='SnapshotTag', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('snapshot', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.snapshot')), + ('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.tag')), + ], + options={ + 'db_table': 'core_snapshot_tags', + 'unique_together': {('snapshot', 'tag')}, + }, + ), + migrations.AlterField( + model_name='snapshot', + name='tags', + field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', to='core.tag'), + ), + ], + ), + ] diff --git a/archivebox/core/migrations/0045_alter_snapshot_old_id.py b/archivebox/core/migrations/0045_alter_snapshot_old_id.py new file mode 100644 index 00000000..7dc1a26a --- /dev/null +++ b/archivebox/core/migrations/0045_alter_snapshot_old_id.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 01:54 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0044_alter_archiveresult_snapshot_alter_tag_uuid_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshot', + name='old_id', + field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py b/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py new file mode 100644 index 00000000..39216ec5 --- /dev/null +++ b/archivebox/core/migrations/0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.0.6 on 2024-08-20 01:55 + +import django.db.models.deletion +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0045_alter_snapshot_old_id'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'), + ), + migrations.AlterField( + model_name='snapshot', + name='id', + field=models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, unique=True), + ), + migrations.AlterField( + model_name='snapshot', + name='old_id', + field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py new file mode 100644 index 00000000..b1c845f8 --- /dev/null +++ b/archivebox/core/migrations/0047_alter_snapshottag_unique_together_and_more.py @@ -0,0 +1,24 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:16 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0046_alter_archiveresult_snapshot_alter_snapshot_id_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='id'), + ), + migrations.AlterField( + model_name='snapshottag', + name='tag', + field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'), + ), + ] diff --git a/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py b/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py new file mode 100644 index 00000000..81bc8a06 --- /dev/null +++ b/archivebox/core/migrations/0048_alter_archiveresult_snapshot_and_more.py @@ -0,0 +1,24 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:17 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0047_alter_snapshottag_unique_together_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='archiveresult', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'), + ), + migrations.AlterField( + model_name='snapshottag', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='old_id'), + ), + ] diff --git a/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py b/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py new file mode 100644 index 00000000..aa0c5b39 --- /dev/null +++ b/archivebox/core/migrations/0049_rename_snapshot_snapshottag_snapshot_old_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:26 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0048_alter_archiveresult_snapshot_and_more'), + ] + + operations = [ + migrations.RenameField( + model_name='snapshottag', + old_name='snapshot', + new_name='snapshot_old', + ), + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together={('snapshot_old', 'tag')}, + ), + ] diff --git a/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py b/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py new file mode 100644 index 00000000..4bff827c --- /dev/null +++ b/archivebox/core/migrations/0050_alter_snapshottag_snapshot_old.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:30 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0049_rename_snapshot_snapshottag_snapshot_old_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshottag', + name='snapshot_old', + field=models.ForeignKey(db_column='snapshot_old_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot', to_field='old_id'), + ), + ] diff --git a/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py b/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py new file mode 100644 index 00000000..ddb7afbb --- /dev/null +++ b/archivebox/core/migrations/0051_snapshottag_snapshot_alter_snapshottag_snapshot_old.py @@ -0,0 +1,40 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:31 + +import django.db.models.deletion +from django.db import migrations, models + + +def update_snapshottag_ids(apps, schema_editor): + Snapshot = apps.get_model("core", "Snapshot") + SnapshotTag = apps.get_model("core", "SnapshotTag") + num_total = SnapshotTag.objects.all().count() + print(f' Updating {num_total} SnapshotTag.snapshot_id values in place... (may take an hour or longer for large collections...)') + for idx, snapshottag in enumerate(SnapshotTag.objects.all().only('snapshot_old_id').iterator()): + assert snapshottag.snapshot_old_id + snapshot = Snapshot.objects.get(old_id=snapshottag.snapshot_old_id) + snapshottag.snapshot_id = snapshot.id + snapshottag.save(update_fields=["snapshot_id"]) + assert str(snapshottag.snapshot_id) == str(snapshot.id) + if idx % 100 == 0: + print(f'Migrated {idx}/{num_total} SnapshotTag objects...') + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0050_alter_snapshottag_snapshot_old'), + ] + + operations = [ + migrations.AddField( + model_name='snapshottag', + name='snapshot', + field=models.ForeignKey(blank=True, db_column='snapshot_id', null=True, on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'), + ), + migrations.AlterField( + model_name='snapshottag', + name='snapshot_old', + field=models.ForeignKey(db_column='snapshot_old_id', on_delete=django.db.models.deletion.CASCADE, related_name='snapshottag_old_set', to='core.snapshot', to_field='old_id'), + ), + migrations.RunPython(update_snapshottag_ids, reverse_code=migrations.RunPython.noop), + ] diff --git a/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py new file mode 100644 index 00000000..e11000bc --- /dev/null +++ b/archivebox/core/migrations/0052_alter_snapshottag_unique_together_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:37 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0051_snapshottag_snapshot_alter_snapshottag_snapshot_old'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together=set(), + ), + migrations.AlterField( + model_name='snapshottag', + name='snapshot', + field=models.ForeignKey(db_column='snapshot_id', on_delete=django.db.models.deletion.CASCADE, to='core.snapshot'), + ), + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together={('snapshot', 'tag')}, + ), + ] diff --git a/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py b/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py new file mode 100644 index 00000000..cf50fc2c --- /dev/null +++ b/archivebox/core/migrations/0053_remove_snapshottag_snapshot_old.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:38 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0052_alter_snapshottag_unique_together_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='snapshottag', + name='snapshot_old', + ), + ] diff --git a/archivebox/core/migrations/0054_alter_snapshot_timestamp.py b/archivebox/core/migrations/0054_alter_snapshot_timestamp.py new file mode 100644 index 00000000..6febe7c3 --- /dev/null +++ b/archivebox/core/migrations/0054_alter_snapshot_timestamp.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-20 02:40 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0053_remove_snapshottag_snapshot_old'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshot', + name='timestamp', + field=models.CharField(db_index=True, editable=False, max_length=32, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0055_alter_tag_slug.py b/archivebox/core/migrations/0055_alter_tag_slug.py new file mode 100644 index 00000000..741b1365 --- /dev/null +++ b/archivebox/core/migrations/0055_alter_tag_slug.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0054_alter_snapshot_timestamp'), + ] + + operations = [ + migrations.AlterField( + model_name='tag', + name='slug', + field=models.SlugField(editable=False, max_length=100, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0056_remove_tag_uuid.py b/archivebox/core/migrations/0056_remove_tag_uuid.py new file mode 100644 index 00000000..9c01507e --- /dev/null +++ b/archivebox/core/migrations/0056_remove_tag_uuid.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:25 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0055_alter_tag_slug'), + ] + + operations = [ + migrations.RemoveField( + model_name='tag', + name='uuid', + ), + ] diff --git a/archivebox/core/migrations/0057_rename_id_tag_old_id.py b/archivebox/core/migrations/0057_rename_id_tag_old_id.py new file mode 100644 index 00000000..ebe20b01 --- /dev/null +++ b/archivebox/core/migrations/0057_rename_id_tag_old_id.py @@ -0,0 +1,18 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:29 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0056_remove_tag_uuid'), + ] + + operations = [ + migrations.RenameField( + model_name='tag', + old_name='id', + new_name='old_id', + ), + ] diff --git a/archivebox/core/migrations/0058_alter_tag_old_id.py b/archivebox/core/migrations/0058_alter_tag_old_id.py new file mode 100644 index 00000000..4cc291c0 --- /dev/null +++ b/archivebox/core/migrations/0058_alter_tag_old_id.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:30 + +import core.models +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0057_rename_id_tag_old_id'), + ] + + operations = [ + migrations.AlterField( + model_name='tag', + name='old_id', + field=models.BigIntegerField(default=core.models.rand_int_id, primary_key=True, serialize=False, verbose_name='Old ID'), + ), + ] diff --git a/archivebox/core/migrations/0059_tag_id.py b/archivebox/core/migrations/0059_tag_id.py new file mode 100644 index 00000000..004ac541 --- /dev/null +++ b/archivebox/core/migrations/0059_tag_id.py @@ -0,0 +1,81 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:33 + +from django.db import migrations, models +from abid_utils.models import ABID, abid_from_values + + +def calculate_abid(self): + """ + Return a freshly derived ABID (assembled from attrs defined in ABIDModel.abid_*_src). + """ + prefix = self.abid_prefix + ts = eval(self.abid_ts_src) + uri = eval(self.abid_uri_src) + subtype = eval(self.abid_subtype_src) + rand = eval(self.abid_rand_src) + + if (not prefix) or prefix == 'obj_': + suggested_abid = self.__class__.__name__[:3].lower() + raise Exception(f'{self.__class__.__name__}.abid_prefix must be defined to calculate ABIDs (suggested: {suggested_abid})') + + if not ts: + ts = datetime.utcfromtimestamp(0) + print(f'[!] WARNING: Generating ABID with ts=0000000000 placeholder because {self.__class__.__name__}.abid_ts_src={self.abid_ts_src} is unset!', ts.isoformat()) + + if not uri: + uri = str(self) + print(f'[!] WARNING: Generating ABID with uri=str(self) placeholder because {self.__class__.__name__}.abid_uri_src={self.abid_uri_src} is unset!', uri) + + if not subtype: + subtype = self.__class__.__name__ + print(f'[!] WARNING: Generating ABID with subtype={subtype} placeholder because {self.__class__.__name__}.abid_subtype_src={self.abid_subtype_src} is unset!', subtype) + + if not rand: + rand = getattr(self, 'uuid', None) or getattr(self, 'id', None) or getattr(self, 'pk') + print(f'[!] WARNING: Generating ABID with rand=self.id placeholder because {self.__class__.__name__}.abid_rand_src={self.abid_rand_src} is unset!', rand) + + abid = abid_from_values( + prefix=prefix, + ts=ts, + uri=uri, + subtype=subtype, + rand=rand, + ) + assert abid.ulid and abid.uuid and abid.typeid, f'Failed to calculate {prefix}_ABID for {self.__class__.__name__}' + return abid + + +def update_archiveresult_ids(apps, schema_editor): + Tag = apps.get_model("core", "Tag") + num_total = Tag.objects.all().count() + print(f' Updating {num_total} Tag.id, ArchiveResult.uuid values in place...') + for idx, tag in enumerate(Tag.objects.all().iterator()): + assert tag.name + tag.abid_prefix = 'tag_' + tag.abid_ts_src = 'self.created' + tag.abid_uri_src = 'self.slug' + tag.abid_subtype_src = '"03"' + tag.abid_rand_src = 'self.old_id' + tag.abid = calculate_abid(tag) + tag.id = tag.abid.uuid + tag.save(update_fields=["abid", "id"]) + assert str(ABID.parse(tag.abid).uuid) == str(tag.id) + if idx % 10 == 0: + print(f'Migrated {idx}/{num_total} Tag objects...') + + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0058_alter_tag_old_id'), + ] + + operations = [ + migrations.AddField( + model_name='tag', + name='id', + field=models.UUIDField(blank=True, null=True), + ), + migrations.RunPython(update_archiveresult_ids, reverse_code=migrations.RunPython.noop), + ] diff --git a/archivebox/core/migrations/0060_alter_tag_id.py b/archivebox/core/migrations/0060_alter_tag_id.py new file mode 100644 index 00000000..aeabefdc --- /dev/null +++ b/archivebox/core/migrations/0060_alter_tag_id.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:42 + +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0059_tag_id'), + ] + + operations = [ + migrations.AlterField( + model_name='tag', + name='id', + field=models.UUIDField(default=uuid.uuid4, editable=False, unique=True), + ), + ] diff --git a/archivebox/core/migrations/0061_rename_tag_snapshottag_old_tag_and_more.py b/archivebox/core/migrations/0061_rename_tag_snapshottag_old_tag_and_more.py new file mode 100644 index 00000000..e29c8081 --- /dev/null +++ b/archivebox/core/migrations/0061_rename_tag_snapshottag_old_tag_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:43 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0060_alter_tag_id'), + ] + + operations = [ + migrations.RenameField( + model_name='snapshottag', + old_name='tag', + new_name='old_tag', + ), + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together={('snapshot', 'old_tag')}, + ), + ] diff --git a/archivebox/core/migrations/0062_alter_snapshottag_old_tag.py b/archivebox/core/migrations/0062_alter_snapshottag_old_tag.py new file mode 100644 index 00000000..561d739c --- /dev/null +++ b/archivebox/core/migrations/0062_alter_snapshottag_old_tag.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:44 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0061_rename_tag_snapshottag_old_tag_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshottag', + name='old_tag', + field=models.ForeignKey(db_column='old_tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'), + ), + ] diff --git a/archivebox/core/migrations/0063_snapshottag_tag_alter_snapshottag_old_tag.py b/archivebox/core/migrations/0063_snapshottag_tag_alter_snapshottag_old_tag.py new file mode 100644 index 00000000..6c574669 --- /dev/null +++ b/archivebox/core/migrations/0063_snapshottag_tag_alter_snapshottag_old_tag.py @@ -0,0 +1,40 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:45 + +import django.db.models.deletion +from django.db import migrations, models + + +def update_snapshottag_ids(apps, schema_editor): + Tag = apps.get_model("core", "Tag") + SnapshotTag = apps.get_model("core", "SnapshotTag") + num_total = SnapshotTag.objects.all().count() + print(f' Updating {num_total} SnapshotTag.tag_id values in place... (may take an hour or longer for large collections...)') + for idx, snapshottag in enumerate(SnapshotTag.objects.all().only('old_tag_id').iterator()): + assert snapshottag.old_tag_id + tag = Tag.objects.get(old_id=snapshottag.old_tag_id) + snapshottag.tag_id = tag.id + snapshottag.save(update_fields=["tag_id"]) + assert str(snapshottag.tag_id) == str(tag.id) + if idx % 100 == 0: + print(f'Migrated {idx}/{num_total} SnapshotTag objects...') + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0062_alter_snapshottag_old_tag'), + ] + + operations = [ + migrations.AddField( + model_name='snapshottag', + name='tag', + field=models.ForeignKey(blank=True, db_column='tag_id', null=True, on_delete=django.db.models.deletion.CASCADE, to='core.tag', to_field='id'), + ), + migrations.AlterField( + model_name='snapshottag', + name='old_tag', + field=models.ForeignKey(db_column='old_tag_id', on_delete=django.db.models.deletion.CASCADE, related_name='snapshottags_old', to='core.tag'), + ), + migrations.RunPython(update_snapshottag_ids, reverse_code=migrations.RunPython.noop), + ] diff --git a/archivebox/core/migrations/0064_alter_snapshottag_unique_together_and_more.py b/archivebox/core/migrations/0064_alter_snapshottag_unique_together_and_more.py new file mode 100644 index 00000000..911bf68b --- /dev/null +++ b/archivebox/core/migrations/0064_alter_snapshottag_unique_together_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:50 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0063_snapshottag_tag_alter_snapshottag_old_tag'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together=set(), + ), + migrations.AlterField( + model_name='snapshottag', + name='tag', + field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag', to_field='id'), + ), + migrations.AlterUniqueTogether( + name='snapshottag', + unique_together={('snapshot', 'tag')}, + ), + ] diff --git a/archivebox/core/migrations/0065_remove_snapshottag_old_tag.py b/archivebox/core/migrations/0065_remove_snapshottag_old_tag.py new file mode 100644 index 00000000..16b2eea0 --- /dev/null +++ b/archivebox/core/migrations/0065_remove_snapshottag_old_tag.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:51 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0064_alter_snapshottag_unique_together_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='snapshottag', + name='old_tag', + ), + ] diff --git a/archivebox/core/migrations/0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id.py b/archivebox/core/migrations/0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id.py new file mode 100644 index 00000000..e6022eab --- /dev/null +++ b/archivebox/core/migrations/0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id.py @@ -0,0 +1,31 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:52 + +import core.models +import django.db.models.deletion +import uuid +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0065_remove_snapshottag_old_tag'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshottag', + name='tag', + field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag', to_field='id'), + ), + migrations.AlterField( + model_name='tag', + name='id', + field=models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False, unique=True), + ), + migrations.AlterField( + model_name='tag', + name='old_id', + field=models.BigIntegerField(default=core.models.rand_int_id, serialize=False, unique=True, verbose_name='Old ID'), + ), + ] diff --git a/archivebox/core/migrations/0067_alter_snapshottag_tag.py b/archivebox/core/migrations/0067_alter_snapshottag_tag.py new file mode 100644 index 00000000..b1c9f6a5 --- /dev/null +++ b/archivebox/core/migrations/0067_alter_snapshottag_tag.py @@ -0,0 +1,19 @@ +# Generated by Django 5.0.6 on 2024-08-20 03:53 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0066_alter_snapshottag_tag_alter_tag_id_alter_tag_old_id'), + ] + + operations = [ + migrations.AlterField( + model_name='snapshottag', + name='tag', + field=models.ForeignKey(db_column='tag_id', on_delete=django.db.models.deletion.CASCADE, to='core.tag'), + ), + ] diff --git a/archivebox/core/migrations/0068_alter_archiveresult_options.py b/archivebox/core/migrations/0068_alter_archiveresult_options.py new file mode 100644 index 00000000..d5606592 --- /dev/null +++ b/archivebox/core/migrations/0068_alter_archiveresult_options.py @@ -0,0 +1,17 @@ +# Generated by Django 5.0.6 on 2024-08-20 07:26 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0067_alter_snapshottag_tag'), + ] + + operations = [ + migrations.AlterModelOptions( + name='archiveresult', + options={'verbose_name': 'Archive Result', 'verbose_name_plural': 'Archive Results Log'}, + ), + ] diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 2c9a9969..c2b6d4e6 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -5,6 +5,7 @@ from typing import Optional, List, Dict from django_stubs_ext.db.models import TypedModelMeta import json +import random import uuid from uuid import uuid4 @@ -14,9 +15,8 @@ from django.db import models from django.utils.functional import cached_property from django.utils.text import slugify from django.core.cache import cache -from django.urls import reverse +from django.urls import reverse, reverse_lazy from django.db.models import Case, When, Value, IntegerField -from django.contrib.auth.models import User # noqa from abid_utils.models import ABIDModel, ABIDField @@ -35,6 +35,8 @@ STATUS_CHOICES = [ ("skipped", "skipped") ] +def rand_int_id(): + return random.getrandbits(32) # class BaseModel(models.Model): @@ -48,24 +50,26 @@ STATUS_CHOICES = [ # abstract = True + + class Tag(ABIDModel): """ Based on django-taggit model + ABID base. """ abid_prefix = 'tag_' abid_ts_src = 'self.created' # TODO: add created/modified time - abid_uri_src = 'self.name' + abid_uri_src = 'self.slug' abid_subtype_src = '"03"' - abid_rand_src = 'self.id' + abid_rand_src = 'self.old_id' - # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True) - id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') - uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True) + old_id = models.BigIntegerField(unique=True, default=rand_int_id, serialize=False, verbose_name='Old ID') # legacy PK + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False, unique=True) abid = ABIDField(prefix=abid_prefix) name = models.CharField(unique=True, blank=False, max_length=100) - slug = models.SlugField(unique=True, blank=True, max_length=100) + slug = models.SlugField(unique=True, blank=False, max_length=100, editable=False) # slug is autoset on save from name, never set it manually @@ -76,6 +80,10 @@ class Tag(ABIDModel): def __str__(self): return self.name + # @property + # def old_id(self): + # return self.id + def slugify(self, tag, i=None): slug = slugify(tag) if i is not None: @@ -103,38 +111,67 @@ class Tag(ABIDModel): i = 1 if i is None else i+1 else: return super().save(*args, **kwargs) + + @property + def api_url(self) -> str: + # /api/v1/core/snapshot/{uulid} + return reverse_lazy('api-1:get_tag', args=[self.abid]) + @property + def api_docs_url(self) -> str: + return f'/api/v1/docs#/Core%20Models/api_v1_core_get_tag' + +class SnapshotTag(models.Model): + id = models.AutoField(primary_key=True) + + snapshot = models.ForeignKey('Snapshot', db_column='snapshot_id', on_delete=models.CASCADE, to_field='id') + tag = models.ForeignKey(Tag, db_column='tag_id', on_delete=models.CASCADE, to_field='id') + + class Meta: + db_table = 'core_snapshot_tags' + unique_together = [('snapshot', 'tag')] class Snapshot(ABIDModel): abid_prefix = 'snp_' abid_ts_src = 'self.added' abid_uri_src = 'self.url' abid_subtype_src = '"01"' - abid_rand_src = 'self.id' + abid_rand_src = 'self.old_id' - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # legacy pk - uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True) + old_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True) # legacy pk + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True) abid = ABIDField(prefix=abid_prefix) url = models.URLField(unique=True, db_index=True) - timestamp = models.CharField(max_length=32, unique=True, db_index=True) + timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) title = models.CharField(max_length=512, null=True, blank=True, db_index=True) + + tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag')) added = models.DateTimeField(auto_now_add=True, db_index=True) updated = models.DateTimeField(auto_now=True, blank=True, null=True, db_index=True) - tags = models.ManyToManyField(Tag, blank=True) keys = ('url', 'timestamp', 'title', 'tags', 'updated') + @property + def uuid(self): + return self.id def __repr__(self) -> str: - title = self.title or '-' - return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})' + title = (self.title_stripped or '-')[:64] + return f'[{self.timestamp}] {self.url[:64]} ({title})' def __str__(self) -> str: - title = self.title or '-' - return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})' + title = (self.title_stripped or '-')[:64] + return f'[{self.timestamp}] {self.url[:64]} ({title})' + + def save(self, *args, **kwargs): + super().save(*args, **kwargs) + try: + assert str(self.id) == str(self.ABID.uuid) == str(self.uuid), f'Snapshot.id ({self.id}) does not match .ABID.uuid ({self.ABID.uuid})' + except AssertionError as e: + print(e) @classmethod def from_json(cls, info: dict): @@ -167,6 +204,19 @@ class Snapshot(ABIDModel): def icons(self) -> str: return snapshot_icons(self) + + @property + def api_url(self) -> str: + # /api/v1/core/snapshot/{uulid} + return reverse_lazy('api-1:get_snapshot', args=[self.abid]) + + @property + def api_docs_url(self) -> str: + return f'/api/v1/docs#/Core%20Models/api_v1_core_get_snapshot' + + @cached_property + def title_stripped(self) -> str: + return (self.title or '').replace("\n", " ").replace("\r", "") @cached_property def extension(self) -> str: @@ -317,21 +367,21 @@ class ArchiveResultManager(models.Manager): qs = qs.annotate(indexing_precedence=Case(*precedence, default=Value(1000),output_field=IntegerField())).order_by('indexing_precedence') return qs - class ArchiveResult(ABIDModel): abid_prefix = 'res_' abid_ts_src = 'self.snapshot.added' abid_uri_src = 'self.snapshot.url' abid_subtype_src = 'self.extractor' - abid_rand_src = 'self.uuid' + abid_rand_src = 'self.id' EXTRACTOR_CHOICES = EXTRACTOR_CHOICES - id = models.AutoField(primary_key=True, serialize=False, verbose_name='ID') # legacy pk TODO: move to UUIDField - # id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - uuid = models.UUIDField(blank=True, null=True, editable=True, unique=True) + old_id = models.BigIntegerField(default=rand_int_id, serialize=False, verbose_name='Old ID') + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=True, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) - snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) + snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE, to_field='id', db_column='snapshot_id') + extractor = models.CharField(choices=EXTRACTOR_CHOICES, max_length=32) cmd = models.JSONField() pwd = models.CharField(max_length=256) @@ -344,15 +394,36 @@ class ArchiveResult(ABIDModel): objects = ArchiveResultManager() class Meta(TypedModelMeta): - verbose_name = 'Result' + verbose_name = 'Archive Result' + verbose_name_plural = 'Archive Results Log' + def __str__(self): return self.extractor + def save(self, *args, **kwargs): + super().save(*args, **kwargs) + try: + assert str(self.id) == str(self.ABID.uuid) == str(self.uuid), f'ArchiveResult.id ({self.id}) does not match .ABID.uuid ({self.ABID.uuid})' + except AssertionError as e: + print(e) + + @property + def uuid(self): + return self.id + @cached_property def snapshot_dir(self): return Path(self.snapshot.link_dir) + @property + def api_url(self) -> str: + # /api/v1/core/archiveresult/{uulid} + return reverse_lazy('api-1:get_archiveresult', args=[self.abid]) + + @property + def api_docs_url(self) -> str: + return f'/api/v1/docs#/Core%20Models/api_v1_core_get_archiveresult' @property def extractor_module(self): diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 870c5681..cac65ee6 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -83,7 +83,7 @@ INSTALLED_APPS = [ 'django.contrib.staticfiles', 'django.contrib.admin', 'django_jsonform', - + 'signal_webhooks', 'abid_utils', 'plugantic', @@ -120,6 +120,8 @@ MIDDLEWARE = [ ### Authentication Settings ################################################################################ +# AUTH_USER_MODEL = 'auth.User' # cannot be easily changed unfortunately + AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.ModelBackend', @@ -463,6 +465,7 @@ SIGNAL_WEBHOOKS = { }, } +DATA_UPLOAD_MAX_NUMBER_FIELDS = None ADMIN_DATA_VIEWS = { "NAME": "Environment", diff --git a/archivebox/core/urls.py b/archivebox/core/urls.py index 14b3d774..04382c99 100644 --- a/archivebox/core/urls.py +++ b/archivebox/core/urls.py @@ -38,7 +38,7 @@ urlpatterns = [ path('accounts/', include('django.contrib.auth.urls')), path('admin/', archivebox_admin.urls), - path("api/", include('api.urls')), + path("api/", include('api.urls'), name='api'), path('health/', HealthCheckView.as_view(), name='healthcheck'), path('error/', lambda *_: 1/0), diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 7e14e8c1..1b322d39 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -181,6 +181,7 @@ class SnapshotView(View): except (IndexError, ValueError): slug, archivefile = path.split('/', 1)[0], 'index.html' + # slug is a timestamp if slug.replace('.','').isdigit(): @@ -227,7 +228,7 @@ class SnapshotView(View): snap.timestamp, snap.timestamp, snap.url, - snap.title or '', + snap.title_stripped[:64] or '', ) for snap in Snapshot.objects.filter(timestamp__startswith=slug).only('url', 'timestamp', 'title', 'added').order_by('-added') ) @@ -278,12 +279,35 @@ class SnapshotView(View): content_type="text/html", status=404, ) + + # # slud is an ID + # ulid = slug.split('_', 1)[-1] + # try: + # try: + # snapshot = snapshot or Snapshot.objects.get(Q(abid=ulid) | Q(id=ulid) | Q(old_id=ulid)) + # except Snapshot.DoesNotExist: + # pass + + # try: + # snapshot = Snapshot.objects.get(Q(abid__startswith=slug) | Q(abid__startswith=Snapshot.abid_prefix + slug) | Q(id__startswith=slug) | Q(old_id__startswith=slug)) + # except (Snapshot.DoesNotExist, Snapshot.MultipleObjectsReturned): + # pass + + # try: + # snapshot = snapshot or Snapshot.objects.get(Q(abid__icontains=snapshot_id) | Q(id__icontains=snapshot_id) | Q(old_id__icontains=snapshot_id)) + # except Snapshot.DoesNotExist: + # pass + # return redirect(f'/archive/{snapshot.timestamp}/index.html') + # except Snapshot.DoesNotExist: + # pass + # slug is a URL try: try: - # try exact match on full url first + # try exact match on full url / ABID first snapshot = Snapshot.objects.get( Q(url='http://' + path) | Q(url='https://' + path) | Q(id__startswith=path) + | Q(abid__icontains=path) | Q(id__icontains=path) | Q(old_id__icontains=path) ) except Snapshot.DoesNotExist: # fall back to match on exact base_url @@ -317,15 +341,17 @@ class SnapshotView(View): except Snapshot.MultipleObjectsReturned: snapshot_hrefs = mark_safe('
').join( format_html( - '{}
{} {} {}', + '{} {} {} {} {}', snap.added.strftime('%Y-%m-%d %H:%M:%S'), + snap.abid, snap.timestamp, snap.timestamp, snap.url, - snap.title or '', + snap.title_stripped[:64] or '', ) for snap in Snapshot.objects.filter( Q(url__startswith='http://' + base_url(path)) | Q(url__startswith='https://' + base_url(path)) + | Q(abid__icontains=path) | Q(id__icontains=path) | Q(old_id__icontains=path) ).only('url', 'timestamp', 'title', 'added').order_by('-added') ) return HttpResponse( diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index c2644eb2..5dfe4630 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -266,7 +266,7 @@ class Link: @cached_property def snapshot(self): from core.models import Snapshot - return Snapshot.objects.only('uuid').get(url=self.url) + return Snapshot.objects.only('id').get(url=self.url) @cached_property def snapshot_id(self): @@ -274,7 +274,7 @@ class Link: @cached_property def snapshot_uuid(self): - return str(self.snapshot.uuid) + return str(self.snapshot.id) @cached_property def snapshot_abid(self): diff --git a/archivebox/manage.py b/archivebox/manage.py index 413a4cfb..6e8c578a 100755 --- a/archivebox/manage.py +++ b/archivebox/manage.py @@ -7,7 +7,7 @@ if __name__ == '__main__': # versions of ./manage.py commands whenever possible. When that's not possible # (e.g. makemigrations), you can comment out this check temporarily - if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv): + if not ('makemigrations' in sys.argv or 'migrate' in sys.argv or 'startapp' in sys.argv or 'squashmigrations' in sys.argv): print("[X] Don't run ./manage.py directly (unless you are a developer running makemigrations):") print() print(' Hint: Use these archivebox CLI commands instead of the ./manage.py equivalents:') diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html index 897a26d5..00e2f205 100644 --- a/archivebox/templates/admin/base.html +++ b/archivebox/templates/admin/base.html @@ -45,6 +45,13 @@ {% endif %} {% endblock %} + + + +