mirror of
https://github.com/linuxserver/docker-diskover.git
synced 2026-02-05 03:13:23 +08:00
125 lines
4.8 KiB
INI
125 lines
4.8 KiB
INI
; diskover config file
|
|
; if you make any changes, restart worker bots so they get the new config
|
|
|
|
[excludes]
|
|
; directory names and absolute paths you want to exclude from crawl, case-sensitive, can include wildcards (.* or backup* or /dir/dirname* or *tmp or *tmp* etc)
|
|
dirs = .*,.snapshot,.Snapshot,.zfs
|
|
; files you want to exclude from crawl, case-sensitive, can include wildcards (.*, *.doc or NULLEXT for files with no extension)
|
|
files = .*,Thumbs.db,.DS_Store,._.DS_Store,.localized,desktop.ini
|
|
|
|
[includes]
|
|
; directory names and absolute paths you want to include (whitelist), case-sensitive, you don't need to whitelist rootdir (-d rootdir)
|
|
;dirs = .recycle
|
|
; files you want to include (whitelist), case-sensitive
|
|
;files =
|
|
|
|
[autotag]
|
|
; pattern dictionaries for diskover bots to use when auto-tagging, values are case-sensitive, can include wildcard for ext, name or path (tmp* or TMP* or *tmp or *TMP* etc)
|
|
;files = [{"name": [], "name_exclude": [], "ext": ["tmp*", "TMP*", "temp*", "TEMP*", "cache*", "CACHE*"], "path": ["*/Application Support/*", "*/Containers/*"], "path_exclude": [], "mtime": 90, "atime": 0, "ctime": 90, "tag": "delete", "tag_custom": "autotag"}]
|
|
;dirs = [{"name": ["*tmp*", "*TMP*", "*temp*", "*TEMP*", "*Temp*", "*cache*", "*CACHE*", "*Cache*"], "name_exclude": ["*templates*", "*Templates*"], "path": ["*/Application Support/*", "*/Containers/*"], "path_exclude": [], "mtime": 90, "atime": 0, "ctime": 90, "tag": "delete", "tag_custom": "autotag"}]
|
|
|
|
[elasticsearch]
|
|
; uncomment the below three lines if you are using AWS ES
|
|
;aws = False
|
|
;host = search-diskover-es-cluster-eg3yztrvzb6qucroyyjk2vokza.ap-northeast-1.es.amazonaws.com
|
|
;port = 443
|
|
; below two lines are for local ES, comment out if you are using AWS ES
|
|
host = {{ES_HOST}}
|
|
port = {{ES_PORT}}
|
|
; uncomment the below two lines if you installed X-Pack, for http-auth
|
|
user = {{ES_USER}}
|
|
password = {{ES_PASS}}
|
|
; index name for ES, cli arg overwrites this
|
|
indexname = diskover-index
|
|
; timeout for connection to ES (default is 10)
|
|
timeout = 30
|
|
; number of connections kept open to ES when crawling (default is 10)
|
|
maxsize = 20
|
|
; max retries for ES operations (default is 0)
|
|
maxretries = 10
|
|
; wait for at least yellow status before bulk uploading (default is False), set to True if you want to wait
|
|
wait = False
|
|
; chunk size for ES bulk operations (default is 500)
|
|
chunksize = 1000
|
|
; number of shards for index (default is 5)
|
|
shards = 1
|
|
; number of replicas for index (default is 1)
|
|
replicas = 0
|
|
; the below settings are to optimize ES for crawling
|
|
; index refresh interval (default is 1s), set to -1 to disable refresh during crawl (fastest performance but no index searches), after crawl is set back to 1s
|
|
indexrefresh = 30s
|
|
; disable replicas during crawl - set to True to turn off replicas or False to keep on (default False), after crawl is set back to replicas value above
|
|
disablereplicas = True
|
|
; transaction log flush threshold size (default 512mb)
|
|
translogsize = 1gb
|
|
|
|
[redis]
|
|
host = {{REDIS_HOST}}
|
|
port = {{REDIS_PORT}}
|
|
;password =
|
|
; cache directory times in Redis
|
|
; used for -I index2 when comparing directory times to get metadata from index2 instead of off disk
|
|
; set to True to cache dir times or False to turn off (default False)
|
|
cachedirtimes = False
|
|
; how long in seconds directory keys lives in Redis (default 1 day)
|
|
dirtimesttl = 604800
|
|
|
|
[treethreads]
|
|
; number of threads to use for tree walking down directories in rootdir (cores x 2 might be a good start)
|
|
threads = 8
|
|
|
|
[adaptivebatch]
|
|
; adaptive batch settings when using -a (intelligent crawling)
|
|
; batchsize (numbers of dirs) to start at
|
|
startsize = 50
|
|
; maximum size of batch
|
|
maxsize = 500
|
|
; when adjusting batch size use this for +/- (increase when queues is > 0, decrease when 0)
|
|
stepsize = 10
|
|
|
|
[workerbot]
|
|
; enable bot logs (True or False), bot logs will slow down crawl, use for debugging only
|
|
botlogs = False
|
|
; log file directory to store worker logs
|
|
; log files are named diskover_bot_worker_<workername>_<time>_log
|
|
logfiledir = /tmp
|
|
|
|
[paths]
|
|
; used by diskover socket server
|
|
; path to diskover.py (default is ./diskover.py)
|
|
diskoverpath = /app/diskover/diskover.py
|
|
; path to python executable (default is python)
|
|
pythonpath = python
|
|
|
|
[socketlistener]
|
|
; hostname and port (TCP) for diskover socket server for remote commands
|
|
host = 0.0.0.0
|
|
port = 9999
|
|
|
|
[dupescheck]
|
|
; read size (bytes) for md5 sum check
|
|
readsize = 65536
|
|
; max size (bytes) of files to check
|
|
maxsize = 1073741824
|
|
; bytes to check at start and end of file before doing md5 sum check
|
|
checkbytes = 8
|
|
|
|
[crawlbot]
|
|
; continuous scanner
|
|
; time to sleep (seconds) between checking for directory changes
|
|
sleeptime = 0.1
|
|
; number of threads for checking directories, setting this to num of cores x2 is a good starting point
|
|
threads = 8
|
|
|
|
[gource]
|
|
; should be set to same in diskover-gource.sh
|
|
maxfilelag = 0.1
|
|
|
|
[qumulo]
|
|
; Qumulo host
|
|
;cluster = 172.16.129.10
|
|
; Qumulo api user
|
|
;api_user = admin
|
|
; Qumulo api password
|
|
;api_password = admin
|