feat: Make search job timeout configurable

2026-01-09 07:44:58 +08:00 · 2025-11-02 18:04:20 +00:00 · 2025-11-02 18:04:20 +00:00 · 965c603de3
commit 965c603de3
parent 33f4077972
4 changed files with 8 additions and 3 deletions
--- a/apps/workers/workers/searchWorker.ts
+++ b/apps/workers/workers/searchWorker.ts
@ -44,7 +44,7 @@ export class SearchIndexingWorker {
        {
          concurrency: serverConfig.search.numWorkers,
          pollIntervalMs: 1000,
-          timeoutSecs: 30,
+          timeoutSecs: serverConfig.search.jobTimeoutSec,
        },
      );

--- a/docs/docs/03-configuration.md
+++ b/docs/docs/03-configuration.md
@ -20,8 +20,9 @@ The app is mainly configured by environment variables. All the used environment
 | PROMETHEUS_AUTH_TOKEN           | No                                    | Random          | Enable a prometheus metrics endpoint at `/api/metrics`. This endpoint will require this token being passed in the Authorization header as a Bearer token. If not set, a new random token is generated everytime at startup. This cannot contain any special characters or you may encounter a 400 Bad Request response. |
 | RATE_LIMITING_ENABLED           | No                                    | false           | If set to true, API rate limiting will be enabled.                                                                                                                                                                                                                                                                      |
 | DB_WAL_MODE                     | No                                    | false           | Enables WAL mode for the sqlite database. This should improve the performance of the database. There's no reason why you shouldn't set this to true unless you're running the db on a network attached drive. This will become the default at some time in the future.                                                  |
-| SEARCH_NUM_WORKERS              | No                                    | 1               | Number of concurrent workers for search indexing tasks. Increase this if you have a high volume of content being indexed for search.                                                                                                                                                                                    |
-| WEBHOOK_NUM_WORKERS             | No                                    | 1               | Number of concurrent workers for webhook delivery. Increase this if you have multiple webhook endpoints or high webhook traffic.                                                                                                                                                                                        |
+| SEARCH_NUM_WORKERS              | No                                    | 1               | Number of concurrent workers for search indexing tasks. Increase this if you have a high volume of content being indexed for search.                                                                                                    |
+| SEARCH_JOB_TIMEOUT_SEC          | No                                    | 30              | How long to wait for a search indexing job to finish before timing out. Increase this if you have large bookmarks with extensive content that takes longer to index.                                                                    |
+| WEBHOOK_NUM_WORKERS             | No                                    | 1               | Number of concurrent workers for webhook delivery. Increase this if you have multiple webhook endpoints or high webhook traffic.                                                                                                        |
 | ASSET_PREPROCESSING_NUM_WORKERS | No                                    | 1               | Number of concurrent workers for asset preprocessing tasks (image processing, OCR, etc.). Increase this if you have many images or documents that need processing.                                                                                                                                                      |
 | RULE_ENGINE_NUM_WORKERS         | No                                    | 1               | Number of concurrent workers for rule engine processing. Increase this if you have complex automation rules that need to be processed quickly.                                                                                                                                                                          |

--- a/packages/plugins-search-meilisearch/src/index.ts
+++ b/packages/plugins-search-meilisearch/src/index.ts
@ -8,6 +8,7 @@ import type {
  SearchOptions,
  SearchResponse,
 } from "@karakeep/shared/search";
+import serverConfig from "@karakeep/shared/config";
 import { PluginProvider } from "@karakeep/shared/plugins";

 import { envConfig } from "./env";
@ -68,6 +69,7 @@ class MeiliSearchIndexClient implements SearchIndexClient {
  private async ensureTaskSuccess(taskUid: number): Promise<void> {
    const task = await this.index.waitForTask(taskUid, {
      intervalMs: 200,
+      timeOutMs: serverConfig.search.jobTimeoutSec * 1000 * 0.9,
    });
    if (task.error) {
      throw new Error(`Search task failed: ${task.error.message}`);
--- a/packages/shared/config.ts
+++ b/packages/shared/config.ts
@ -88,6 +88,7 @@ const allEnv = z.object({
  CRAWLER_NUM_WORKERS: z.coerce.number().default(1),
  INFERENCE_NUM_WORKERS: z.coerce.number().default(1),
  SEARCH_NUM_WORKERS: z.coerce.number().default(1),
+  SEARCH_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
  WEBHOOK_NUM_WORKERS: z.coerce.number().default(1),
  ASSET_PREPROCESSING_NUM_WORKERS: z.coerce.number().default(1),
  RULE_ENGINE_NUM_WORKERS: z.coerce.number().default(1),
@ -306,6 +307,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
    },
    search: {
      numWorkers: val.SEARCH_NUM_WORKERS,
+      jobTimeoutSec: val.SEARCH_JOB_TIMEOUT_SEC,
    },
    logLevel: val.LOG_LEVEL,
    logNoColor: val.NO_COLOR,