Add JavaScript API (WebAssembly) for Google MedAsr model (#2954)

This commit is contained in:
Fangjun Kuang 2025-12-29 12:50:51 +08:00 committed by GitHub
parent 14da0f6f76
commit a53d9eec12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 101 additions and 4 deletions

View File

@ -9,6 +9,14 @@ git status
ls -lh
ls -lh node_modules
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
node ./test-offline-medasr-ctc.js
rm -rf sherpa-onnx-medasr-ctc-en-int8-2025-12-25
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2

View File

@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
node ./test-offline-zipformer-ctc.js
```
## ./test-offline-medasr-ctc.js
[./test-offline-medasr-ctc.js](./test-offline-medasr-ctc.js) demonstrates
how to decode a file with a Google MedASR CTC model. In the code we use
[sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2).
You can use the following command to run it:
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
node ./test-offline-medasr-ctc.js
```
## ./test-offline-omnilingual-asr-ctc.js
[./test-offline-omnilingual-asr-ctc.js](./test-offline-omnilingual-asr-ctc.js) demonstrates

View File

@ -0,0 +1,35 @@
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createOfflineRecognizer() {
let config = {
modelConfig: {
medasr: {
model: './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx',
},
tokens: './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt',
}
};
return sherpa_onnx.createOfflineRecognizer(config);
}
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav';
const wave = sherpa_onnx.readWave(waveFilename);
stream.acceptWaveform(wave.sampleRate, wave.samples);
recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);
stream.free();
recognizer.free();

View File

@ -6,7 +6,6 @@ function createOfflineTts() {
let offlineTtsMatchaModelConfig = {
acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
vocoder: './vocos-22khz-univ.onnx',
lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',

View File

@ -59,6 +59,10 @@ function freeConfig(config, Module) {
freeConfig(config.omnilingual, Module)
}
if ('medasr' in config) {
freeConfig(config.medasr, Module)
}
if ('moonshine' in config) {
freeConfig(config.moonshine, Module)
}
@ -776,6 +780,23 @@ function initSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(config, Module) {
}
}
function initSherpaOnnxOfflineMedAsrCtcModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
@ -1052,6 +1073,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
};
}
if (!('medasr' in config)) {
config.medasr = {
model: '',
};
}
if (!('whisper' in config)) {
config.whisper = {
encoder: '',
@ -1139,10 +1166,13 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
const omnilingual = initSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(
config.omnilingual, Module);
const medasr =
initSherpaOnnxOfflineMedAsrCtcModelConfig(config.medasr, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len +
omnilingual.len;
omnilingual.len + medasr.len;
const ptr = Module._malloc(len);
@ -1256,12 +1286,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
Module._CopyHeap(omnilingual.ptr, omnilingual.len, ptr + offset);
offset += omnilingual.len;
Module._CopyHeap(medasr.ptr, medasr.len, ptr + offset);
offset += medasr.len;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
wenetCtc: wenetCtc, omnilingual: omnilingual
wenetCtc: wenetCtc, omnilingual: omnilingual, medasr: medasr
}
}

View File

@ -16,6 +16,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWenetCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineMedAsrCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
@ -39,7 +40,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) +
sizeof(SherpaOnnxOfflineCanaryModelConfig) +
sizeof(SherpaOnnxOfflineWenetCtcModelConfig) +
sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig),
sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig) +
sizeof(SherpaOnnxOfflineMedAsrCtcModelConfig),
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
@ -89,6 +91,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
auto canary = &model_config->canary;
auto wenet_ctc = &model_config->wenet_ctc;
auto omnilingual = &model_config->omnilingual;
auto medasr = &model_config->medasr;
fprintf(stdout, "----------offline transducer model config----------\n");
fprintf(stdout, "encoder: %s\n", transducer->encoder);
@ -145,6 +148,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "----------offline Omnilingual ASR model config----------\n");
fprintf(stdout, "model: %s\n", omnilingual->model);
fprintf(stdout, "----------offline MedASR model config----------\n");
fprintf(stdout, "model: %s\n", medasr->model);
fprintf(stdout, "tokens: %s\n", model_config->tokens);
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
fprintf(stdout, "provider: %s\n", model_config->provider);