feat: Make search job timeout configurable

This commit is contained in:
Mohamed Bassem 2025-11-02 18:04:20 +00:00
parent 33f4077972
commit 965c603de3
4 changed files with 8 additions and 3 deletions

View File

@ -44,7 +44,7 @@ export class SearchIndexingWorker {
{
concurrency: serverConfig.search.numWorkers,
pollIntervalMs: 1000,
timeoutSecs: 30,
timeoutSecs: serverConfig.search.jobTimeoutSec,
},
);

View File

@ -20,8 +20,9 @@ The app is mainly configured by environment variables. All the used environment
| PROMETHEUS_AUTH_TOKEN | No | Random | Enable a prometheus metrics endpoint at `/api/metrics`. This endpoint will require this token being passed in the Authorization header as a Bearer token. If not set, a new random token is generated everytime at startup. This cannot contain any special characters or you may encounter a 400 Bad Request response. |
| RATE_LIMITING_ENABLED | No | false | If set to true, API rate limiting will be enabled. |
| DB_WAL_MODE | No | false | Enables WAL mode for the sqlite database. This should improve the performance of the database. There's no reason why you shouldn't set this to true unless you're running the db on a network attached drive. This will become the default at some time in the future. |
| SEARCH_NUM_WORKERS | No | 1 | Number of concurrent workers for search indexing tasks. Increase this if you have a high volume of content being indexed for search. |
| WEBHOOK_NUM_WORKERS | No | 1 | Number of concurrent workers for webhook delivery. Increase this if you have multiple webhook endpoints or high webhook traffic. |
| SEARCH_NUM_WORKERS | No | 1 | Number of concurrent workers for search indexing tasks. Increase this if you have a high volume of content being indexed for search. |
| SEARCH_JOB_TIMEOUT_SEC | No | 30 | How long to wait for a search indexing job to finish before timing out. Increase this if you have large bookmarks with extensive content that takes longer to index. |
| WEBHOOK_NUM_WORKERS | No | 1 | Number of concurrent workers for webhook delivery. Increase this if you have multiple webhook endpoints or high webhook traffic. |
| ASSET_PREPROCESSING_NUM_WORKERS | No | 1 | Number of concurrent workers for asset preprocessing tasks (image processing, OCR, etc.). Increase this if you have many images or documents that need processing. |
| RULE_ENGINE_NUM_WORKERS | No | 1 | Number of concurrent workers for rule engine processing. Increase this if you have complex automation rules that need to be processed quickly. |

View File

@ -8,6 +8,7 @@ import type {
SearchOptions,
SearchResponse,
} from "@karakeep/shared/search";
import serverConfig from "@karakeep/shared/config";
import { PluginProvider } from "@karakeep/shared/plugins";
import { envConfig } from "./env";
@ -68,6 +69,7 @@ class MeiliSearchIndexClient implements SearchIndexClient {
private async ensureTaskSuccess(taskUid: number): Promise<void> {
const task = await this.index.waitForTask(taskUid, {
intervalMs: 200,
timeOutMs: serverConfig.search.jobTimeoutSec * 1000 * 0.9,
});
if (task.error) {
throw new Error(`Search task failed: ${task.error.message}`);

View File

@ -88,6 +88,7 @@ const allEnv = z.object({
CRAWLER_NUM_WORKERS: z.coerce.number().default(1),
INFERENCE_NUM_WORKERS: z.coerce.number().default(1),
SEARCH_NUM_WORKERS: z.coerce.number().default(1),
SEARCH_JOB_TIMEOUT_SEC: z.coerce.number().default(30),
WEBHOOK_NUM_WORKERS: z.coerce.number().default(1),
ASSET_PREPROCESSING_NUM_WORKERS: z.coerce.number().default(1),
RULE_ENGINE_NUM_WORKERS: z.coerce.number().default(1),
@ -306,6 +307,7 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
},
search: {
numWorkers: val.SEARCH_NUM_WORKERS,
jobTimeoutSec: val.SEARCH_JOB_TIMEOUT_SEC,
},
logLevel: val.LOG_LEVEL,
logNoColor: val.NO_COLOR,