mirror of
https://github.com/k2-fsa/sherpa-onnx.git
synced 2026-01-09 07:41:06 +08:00
Add JavaScript API (WebAssembly) for Google MedAsr model (#2954)
This commit is contained in:
parent
14da0f6f76
commit
a53d9eec12
8
.github/scripts/test-nodejs-npm.sh
vendored
8
.github/scripts/test-nodejs-npm.sh
vendored
@ -9,6 +9,14 @@ git status
|
||||
ls -lh
|
||||
ls -lh node_modules
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
|
||||
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
|
||||
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
|
||||
|
||||
node ./test-offline-medasr-ctc.js
|
||||
|
||||
rm -rf sherpa-onnx-medasr-ctc-en-int8-2025-12-25
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
|
||||
tar xvf sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
|
||||
rm sherpa-onnx-omnilingual-asr-1600-languages-300M-ctc-int8-2025-11-12.tar.bz2
|
||||
|
||||
@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
node ./test-offline-zipformer-ctc.js
|
||||
```
|
||||
|
||||
## ./test-offline-medasr-ctc.js
|
||||
|
||||
[./test-offline-medasr-ctc.js](./test-offline-medasr-ctc.js) demonstrates
|
||||
how to decode a file with a Google MedASR CTC model. In the code we use
|
||||
[sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2).
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
|
||||
tar xvf sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
|
||||
rm sherpa-onnx-medasr-ctc-en-int8-2025-12-25.tar.bz2
|
||||
|
||||
node ./test-offline-medasr-ctc.js
|
||||
```
|
||||
|
||||
## ./test-offline-omnilingual-asr-ctc.js
|
||||
|
||||
[./test-offline-omnilingual-asr-ctc.js](./test-offline-omnilingual-asr-ctc.js) demonstrates
|
||||
|
||||
35
nodejs-examples/test-offline-medasr-ctc.js
Normal file
35
nodejs-examples/test-offline-medasr-ctc.js
Normal file
@ -0,0 +1,35 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
//
|
||||
const fs = require('fs');
|
||||
const {Readable} = require('stream');
|
||||
const wav = require('wav');
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineRecognizer() {
|
||||
let config = {
|
||||
modelConfig: {
|
||||
medasr: {
|
||||
model: './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/model.int8.onnx',
|
||||
},
|
||||
tokens: './sherpa-onnx-medasr-ctc-en-int8-2025-12-25/tokens.txt',
|
||||
}
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineRecognizer(config);
|
||||
}
|
||||
|
||||
const recognizer = createOfflineRecognizer();
|
||||
const stream = recognizer.createStream();
|
||||
|
||||
const waveFilename =
|
||||
'./sherpa-onnx-medasr-ctc-en-int8-2025-12-25/test_wavs/0.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||
|
||||
recognizer.decode(stream);
|
||||
const text = recognizer.getResult(stream).text;
|
||||
console.log(text);
|
||||
|
||||
stream.free();
|
||||
recognizer.free();
|
||||
@ -6,7 +6,6 @@ function createOfflineTts() {
|
||||
let offlineTtsMatchaModelConfig = {
|
||||
acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
|
||||
vocoder: './vocos-22khz-univ.onnx',
|
||||
lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
|
||||
tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
|
||||
dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
|
||||
|
||||
|
||||
@ -59,6 +59,10 @@ function freeConfig(config, Module) {
|
||||
freeConfig(config.omnilingual, Module)
|
||||
}
|
||||
|
||||
if ('medasr' in config) {
|
||||
freeConfig(config.medasr, Module)
|
||||
}
|
||||
|
||||
if ('moonshine' in config) {
|
||||
freeConfig(config.moonshine, Module)
|
||||
}
|
||||
@ -776,6 +780,23 @@ function initSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(config, Module) {
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineMedAsrCtcModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model || '', buffer, n);
|
||||
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
||||
@ -1052,6 +1073,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
};
|
||||
}
|
||||
|
||||
if (!('medasr' in config)) {
|
||||
config.medasr = {
|
||||
model: '',
|
||||
};
|
||||
}
|
||||
|
||||
if (!('whisper' in config)) {
|
||||
config.whisper = {
|
||||
encoder: '',
|
||||
@ -1139,10 +1166,13 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
const omnilingual = initSherpaOnnxOfflineOmnilingualAsrCtcModelConfig(
|
||||
config.omnilingual, Module);
|
||||
|
||||
const medasr =
|
||||
initSherpaOnnxOfflineMedAsrCtcModelConfig(config.medasr, Module);
|
||||
|
||||
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
||||
dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len +
|
||||
omnilingual.len;
|
||||
omnilingual.len + medasr.len;
|
||||
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
@ -1256,12 +1286,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
Module._CopyHeap(omnilingual.ptr, omnilingual.len, ptr + offset);
|
||||
offset += omnilingual.len;
|
||||
|
||||
Module._CopyHeap(medasr.ptr, medasr.len, ptr + offset);
|
||||
offset += medasr.len;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
||||
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
||||
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
|
||||
wenetCtc: wenetCtc, omnilingual: omnilingual
|
||||
wenetCtc: wenetCtc, omnilingual: omnilingual, medasr: medasr
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWenetCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineMedAsrCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
|
||||
@ -39,7 +40,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
||||
sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineCanaryModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineWenetCtcModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig),
|
||||
sizeof(SherpaOnnxOfflineOmnilingualAsrCtcModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineMedAsrCtcModelConfig),
|
||||
|
||||
"");
|
||||
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||
@ -89,6 +91,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
auto canary = &model_config->canary;
|
||||
auto wenet_ctc = &model_config->wenet_ctc;
|
||||
auto omnilingual = &model_config->omnilingual;
|
||||
auto medasr = &model_config->medasr;
|
||||
|
||||
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||
@ -145,6 +148,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
fprintf(stdout, "----------offline Omnilingual ASR model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", omnilingual->model);
|
||||
|
||||
fprintf(stdout, "----------offline MedASR model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", medasr->model);
|
||||
|
||||
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user