diff --git a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py index 0f5ac5ac..3fa8a1a4 100644 --- a/archivebox/core/migrations/0023_upgrade_to_0_9_0.py +++ b/archivebox/core/migrations/0023_upgrade_to_0_9_0.py @@ -16,12 +16,18 @@ def upgrade_core_tables(apps, schema_editor): """Upgrade core tables from v0.7.2 or v0.8.6rc0 to v0.9.0.""" cursor = connection.cursor() - # Check if core_archiveresult table exists + # Check if core_archiveresult table exists AND has data cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_archiveresult'") if not cursor.fetchone(): # Fresh install - no migration needed, tables will be created by later migrations return + # Check if table has any rows (fresh install has empty tables from CREATE TABLE IF NOT EXISTS) + cursor.execute("SELECT COUNT(*) FROM core_archiveresult") + if cursor.fetchone()[0] == 0: + # Fresh install with empty tables - skip migration + return + # Detect which version we're migrating from archiveresult_cols = get_table_columns('core_archiveresult') has_uuid = 'uuid' in archiveresult_cols @@ -231,11 +237,58 @@ def upgrade_core_tables(apps, schema_editor): cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_tag'") if cursor.fetchone(): - cursor.execute(""" - INSERT OR IGNORE INTO core_tag_new (id, name, slug) - SELECT id, name, slug - FROM core_tag; - """) + tag_cols = get_table_columns('core_tag') + cursor.execute("PRAGMA table_info(core_tag)") + tag_id_type = None + for row in cursor.fetchall(): + if row[1] == 'id': # row[1] is column name + tag_id_type = row[2] # row[2] is type + break + + if tag_id_type and 'char' in tag_id_type.lower(): + # v0.8.6rc0: Tag IDs are UUIDs, need to convert to INTEGER + print('Converting Tag IDs from UUID to INTEGER...') + + # Get all tags with their UUIDs + cursor.execute("SELECT id, name, slug, created_at, modified_at, created_by_id FROM core_tag ORDER BY name") + tags = cursor.fetchall() + + # Create mapping from old UUID to new INTEGER ID + uuid_to_int_map = {} + for i, tag in enumerate(tags, start=1): + old_id, name, slug, created_at, modified_at, created_by_id = tag + uuid_to_int_map[old_id] = i + # Insert with new INTEGER ID + cursor.execute(""" + INSERT OR IGNORE INTO core_tag_new (id, name, slug, created_at, modified_at, created_by_id) + VALUES (?, ?, ?, ?, ?, ?) + """, (i, name, slug, created_at, modified_at, created_by_id)) + + # Update snapshot_tags to use new INTEGER IDs + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='core_snapshot_tags'") + if cursor.fetchone(): + cursor.execute("SELECT id, snapshot_id, tag_id FROM core_snapshot_tags") + snapshot_tags = cursor.fetchall() + + # Delete old entries + cursor.execute("DELETE FROM core_snapshot_tags") + + # Re-insert with new integer tag IDs + for st_id, snapshot_id, old_tag_id in snapshot_tags: + new_tag_id = uuid_to_int_map.get(old_tag_id) + if new_tag_id: + cursor.execute(""" + INSERT OR IGNORE INTO core_snapshot_tags (id, snapshot_id, tag_id) + VALUES (?, ?, ?) + """, (st_id, snapshot_id, new_tag_id)) + else: + # v0.7.2: Tag IDs are already INTEGER + print('Migrating Tag from v0.7.2 schema...') + cursor.execute(""" + INSERT OR IGNORE INTO core_tag_new (id, name, slug) + SELECT id, name, slug + FROM core_tag; + """) cursor.execute("DROP TABLE IF EXISTS core_tag;") cursor.execute("ALTER TABLE core_tag_new RENAME TO core_tag;") diff --git a/archivebox/core/migrations/0024_assign_default_crawl.py b/archivebox/core/migrations/0024_assign_default_crawl.py index 8e985032..b6890b8c 100644 --- a/archivebox/core/migrations/0024_assign_default_crawl.py +++ b/archivebox/core/migrations/0024_assign_default_crawl.py @@ -33,6 +33,7 @@ def create_default_crawl_and_assign_snapshots(apps, schema_editor): """, [datetime.now().isoformat()]) # Create a default crawl for migrated snapshots + # At this point crawls_crawl is guaranteed to have v0.9.0 schema (crawls/0002 ran first) crawl_id = str(uuid_lib.uuid4()) now = datetime.now().isoformat() @@ -41,8 +42,8 @@ def create_default_crawl_and_assign_snapshots(apps, schema_editor): id, created_at, modified_at, num_uses_succeeded, num_uses_failed, urls, max_depth, tags_str, label, notes, output_dir, status, retry_at, created_by_id, schedule_id, config, persona_id - ) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2', - 'Auto-created crawl for snapshots migrated from v0.7.2', '', + ) VALUES (?, ?, ?, 0, 0, '', 0, '', 'Migrated from v0.7.2/v0.8.6', + 'Auto-created crawl for migrated snapshots', '', 'sealed', ?, 1, NULL, '{}', NULL) """, [crawl_id, now, now, now]) @@ -56,7 +57,7 @@ class Migration(migrations.Migration): dependencies = [ ('core', '0023_upgrade_to_0_9_0'), - ('crawls', '0001_initial'), + ('crawls', '0002_upgrade_from_0_8_6'), ('auth', '0012_alter_user_first_name_max_length'), ] diff --git a/archivebox/hooks.py b/archivebox/hooks.py index a9bb671f..9078e02a 100644 --- a/archivebox/hooks.py +++ b/archivebox/hooks.py @@ -1198,10 +1198,10 @@ def process_hook_records(records: List[Dict[str, Any]], overrides: Dict[str, Any continue try: - # Dispatch to appropriate model's from_jsonl() method + # Dispatch to appropriate model's from_json() method if record_type == 'Snapshot': from archivebox.core.models import Snapshot - obj = Snapshot.from_jsonll(record.copy(), overrides) + obj = Snapshot.from_json(record.copy(), overrides) if obj: stats['Snapshot'] = stats.get('Snapshot', 0) + 1