mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-02-20 00:56:07 +08:00
54 lines
1.2 KiB
Python
Executable File
54 lines
1.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Emit readability-extractor Binary dependency for the crawl.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
|
|
def get_env(name: str, default: str = '') -> str:
|
|
return os.environ.get(name, default).strip()
|
|
|
|
def get_env_bool(name: str, default: bool = False) -> bool:
|
|
val = get_env(name, '').lower()
|
|
if val in ('true', '1', 'yes', 'on'):
|
|
return True
|
|
if val in ('false', '0', 'no', 'off'):
|
|
return False
|
|
return default
|
|
|
|
|
|
def output_binary(name: str, binproviders: str):
|
|
"""Output Binary JSONL record for a dependency."""
|
|
machine_id = os.environ.get('MACHINE_ID', '')
|
|
|
|
record = {
|
|
'type': 'Binary',
|
|
'name': name,
|
|
'binproviders': binproviders,
|
|
'overrides': {
|
|
'npm': {
|
|
'packages': ['https://github.com/ArchiveBox/readability-extractor'],
|
|
},
|
|
},
|
|
'machine_id': machine_id,
|
|
}
|
|
print(json.dumps(record))
|
|
|
|
|
|
def main():
|
|
readability_enabled = get_env_bool('READABILITY_ENABLED', True)
|
|
|
|
if not readability_enabled:
|
|
sys.exit(0)
|
|
|
|
output_binary(name='readability-extractor', binproviders='npm,env')
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|