mirror of
https://github.com/k2-fsa/sherpa-onnx.git
synced 2026-01-09 07:41:06 +08:00
Add various languge bindings for Wenet non-streaming CTC models (#2584)
This PR adds support for Wenet non-streaming CTC models to sherpa-onnx by introducing the SherpaOnnxOfflineWenetCtcModelConfig struct and integrating it across all language bindings and APIs. The implementation follows the same pattern as other CTC model types like Zipformer CTC. - Introduces SherpaOnnxOfflineWenetCtcModelConfig struct with a single model field for the ONNX model path - Adds the new config to SherpaOnnxOfflineModelConfig and updates all language bindings (C++, Pascal, Kotlin, Java, Go, C#, Swift, JavaScript, etc.) - Provides comprehensive examples and tests across all supported platforms and languages
This commit is contained in:
parent
71f87e1808
commit
7e42ba2c0c
4
.github/scripts/test-dart.sh
vendored
4
.github/scripts/test-dart.sh
vendored
@ -70,6 +70,10 @@ popd
|
||||
|
||||
pushd non-streaming-asr
|
||||
|
||||
echo '----------Wenet CTC----------'
|
||||
./run-wenet-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
echo '----------Zipformer CTC----------'
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
6
.github/scripts/test-dot-net.sh
vendored
6
.github/scripts/test-dot-net.sh
vendored
@ -27,6 +27,9 @@ rm -rf sherpa-onnx-nemo-*
|
||||
|
||||
cd ../offline-decode-files
|
||||
|
||||
./run-wenet-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
@ -108,6 +111,9 @@ cd ../keyword-spotting-from-files
|
||||
./run.sh
|
||||
|
||||
cd ../online-decode-files
|
||||
./run-t-one-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
./run-transducer-itn.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
|
||||
11
.github/scripts/test-nodejs-addon-npm.sh
vendored
11
.github/scripts/test-nodejs-addon-npm.sh
vendored
@ -10,7 +10,16 @@ arch=$(node -p "require('os').arch()")
|
||||
platform=$(node -p "require('os').platform()")
|
||||
node_version=$(node -p "process.versions.node.split('.')[0]")
|
||||
|
||||
echo "----------streaming ASR T-one----------"
|
||||
echo "----------non-streaming ASR Wenet CTC----------"
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_wenet_ctc.js
|
||||
rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
|
||||
|
||||
echo "----------streaming ASR T-one CTC----------"
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
|
||||
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
|
||||
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
|
||||
|
||||
7
.github/scripts/test-nodejs-npm.sh
vendored
7
.github/scripts/test-nodejs-npm.sh
vendored
@ -9,6 +9,13 @@ git status
|
||||
ls -lh
|
||||
ls -lh node_modules
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
node ./test-offline-wenet-ctc.js
|
||||
rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
|
||||
tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
|
||||
rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
|
||||
|
||||
3
.github/scripts/test-swift.sh
vendored
3
.github/scripts/test-swift.sh
vendored
@ -19,6 +19,9 @@ rm -fv *.wav *.onnx
|
||||
ls -lh
|
||||
rm -rf kitten-*
|
||||
|
||||
./run-wenet-ctc-asr.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
./run-zipformer-ctc-asr.sh
|
||||
rm -rf sherpa-onnx-zipformer-*
|
||||
|
||||
|
||||
30
.github/workflows/c-api.yaml
vendored
30
.github/workflows/c-api.yaml
vendored
@ -75,6 +75,36 @@ jobs:
|
||||
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
|
||||
fi
|
||||
|
||||
- name: Test Wenet CTC
|
||||
shell: bash
|
||||
run: |
|
||||
name=wenet-ctc-c-api
|
||||
gcc -o $name ./c-api-examples/$name.c \
|
||||
-I ./build/install/include \
|
||||
-L ./build/install/lib/ \
|
||||
-l sherpa-onnx-c-api \
|
||||
-l onnxruntime
|
||||
|
||||
ls -lh $name
|
||||
|
||||
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
|
||||
ldd ./$name
|
||||
echo "----"
|
||||
readelf -d ./$name
|
||||
fi
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
|
||||
|
||||
./$name
|
||||
|
||||
rm $name
|
||||
rm -rf sherpa-onnx-wenetspeech-*
|
||||
|
||||
- name: Test T-one
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
34
.github/workflows/cxx-api.yaml
vendored
34
.github/workflows/cxx-api.yaml
vendored
@ -78,6 +78,40 @@ jobs:
|
||||
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
|
||||
fi
|
||||
|
||||
- name: Test Wenet CTC
|
||||
shell: bash
|
||||
run: |
|
||||
name=wenet-ctc-cxx-api
|
||||
g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
|
||||
-I ./build/install/include \
|
||||
-L ./build/install/lib/ \
|
||||
-l sherpa-onnx-cxx-api \
|
||||
-l sherpa-onnx-c-api \
|
||||
-l onnxruntime
|
||||
|
||||
ls -lh $name
|
||||
|
||||
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
|
||||
ls -lh ./$name
|
||||
ldd ./$name
|
||||
echo "----"
|
||||
readelf -d ./$name
|
||||
fi
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
echo "---"
|
||||
|
||||
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
|
||||
|
||||
./$name
|
||||
|
||||
rm -rf sherpa-onnx-wenetspeech-*
|
||||
rm -v ./$name
|
||||
|
||||
- name: Test T-one
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
4
.github/workflows/test-go.yaml
vendored
4
.github/workflows/test-go.yaml
vendored
@ -194,6 +194,10 @@ jobs:
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
echo "Test Wenet CTC"
|
||||
./run-wenet-ctc.sh
|
||||
rm -rf sherpa-onnx-wenet*
|
||||
|
||||
echo "Test Zipformer CTC"
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-zipformer-*
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -151,3 +151,4 @@ kitten-nano-en-v0_1-fp16
|
||||
vocab.json
|
||||
*.so
|
||||
sherpa-onnx-streaming-t-one-russian-2025-09-08
|
||||
sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
|
||||
|
||||
@ -80,6 +80,9 @@ target_link_libraries(moonshine-c-api sherpa-onnx-c-api)
|
||||
add_executable(zipformer-c-api zipformer-c-api.c)
|
||||
target_link_libraries(zipformer-c-api sherpa-onnx-c-api)
|
||||
|
||||
add_executable(wenet-ctc-c-api wenet-ctc-c-api.c)
|
||||
target_link_libraries(wenet-ctc-c-api sherpa-onnx-c-api)
|
||||
|
||||
add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c)
|
||||
target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api)
|
||||
|
||||
|
||||
82
c-api-examples/wenet-ctc-c-api.c
Normal file
82
c-api-examples/wenet-ctc-c-api.c
Normal file
@ -0,0 +1,82 @@
|
||||
// c-api-examples/wenet-ctc-c-api.c
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
//
|
||||
// This file demonstrates how to use non-streaming Wenet CTC model with
|
||||
// sherpa-onnx's C API.
|
||||
// clang-format off
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sherpa-onnx/c-api/c-api.h"
|
||||
|
||||
int32_t main() {
|
||||
// clang-format off
|
||||
const char *wav_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
|
||||
const char *model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
|
||||
const char *tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
|
||||
// clang-format on
|
||||
const char *provider = "cpu";
|
||||
|
||||
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
|
||||
if (wave == NULL) {
|
||||
fprintf(stderr, "Failed to read %s\n", wav_filename);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Zipformer config
|
||||
SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc_config;
|
||||
memset(&wenet_ctc_config, 0, sizeof(wenet_ctc_config));
|
||||
wenet_ctc_config.model = model;
|
||||
|
||||
// Offline model config
|
||||
SherpaOnnxOfflineModelConfig offline_model_config;
|
||||
memset(&offline_model_config, 0, sizeof(offline_model_config));
|
||||
offline_model_config.debug = 1;
|
||||
offline_model_config.num_threads = 1;
|
||||
offline_model_config.provider = provider;
|
||||
offline_model_config.tokens = tokens;
|
||||
offline_model_config.wenet_ctc = wenet_ctc_config;
|
||||
|
||||
// Recognizer config
|
||||
SherpaOnnxOfflineRecognizerConfig recognizer_config;
|
||||
memset(&recognizer_config, 0, sizeof(recognizer_config));
|
||||
recognizer_config.decoding_method = "greedy_search";
|
||||
recognizer_config.model_config = offline_model_config;
|
||||
|
||||
const SherpaOnnxOfflineRecognizer *recognizer =
|
||||
SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
|
||||
|
||||
if (recognizer == NULL) {
|
||||
fprintf(stderr, "Please check your config!\n");
|
||||
SherpaOnnxFreeWave(wave);
|
||||
return -1;
|
||||
}
|
||||
|
||||
const SherpaOnnxOfflineStream *stream =
|
||||
SherpaOnnxCreateOfflineStream(recognizer);
|
||||
|
||||
SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
|
||||
wave->num_samples);
|
||||
SherpaOnnxDecodeOfflineStream(recognizer, stream);
|
||||
const SherpaOnnxOfflineRecognizerResult *result =
|
||||
SherpaOnnxGetOfflineStreamResult(stream);
|
||||
|
||||
fprintf(stderr, "Decoded text: %s\n", result->text);
|
||||
|
||||
SherpaOnnxDestroyOfflineRecognizerResult(result);
|
||||
SherpaOnnxDestroyOfflineStream(stream);
|
||||
SherpaOnnxDestroyOfflineRecognizer(recognizer);
|
||||
SherpaOnnxFreeWave(wave);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -30,6 +30,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
|
||||
add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
|
||||
target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
|
||||
|
||||
add_executable(wenet-ctc-cxx-api ./wenet-ctc-cxx-api.cc)
|
||||
target_link_libraries(wenet-ctc-cxx-api sherpa-onnx-cxx-api)
|
||||
|
||||
add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
|
||||
target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
|
||||
|
||||
@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
|
||||
portaudio_static
|
||||
)
|
||||
|
||||
add_executable(wenet-ctc-simulate-streaming-microphone-cxx-api
|
||||
./wenet-ctc-simulate-streaming-microphone-cxx-api.cc
|
||||
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
|
||||
)
|
||||
target_link_libraries(wenet-ctc-simulate-streaming-microphone-cxx-api
|
||||
sherpa-onnx-cxx-api
|
||||
portaudio_static
|
||||
)
|
||||
|
||||
add_executable(parakeet-tdt-simulate-streaming-microphone-cxx-api
|
||||
./parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
|
||||
${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
|
||||
|
||||
75
cxx-api-examples/wenet-ctc-cxx-api.cc
Normal file
75
cxx-api-examples/wenet-ctc-cxx-api.cc
Normal file
@ -0,0 +1,75 @@
|
||||
// cxx-api-examples/wenet-cxx-api.cc
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
//
|
||||
// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API.
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
|
||||
#include <chrono> // NOLINT
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/c-api/cxx-api.h"
|
||||
|
||||
int32_t main() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
OfflineRecognizerConfig config;
|
||||
|
||||
// clang-format off
|
||||
config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
|
||||
config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
|
||||
|
||||
config.model_config.num_threads = 1;
|
||||
|
||||
std::cout << "Loading model\n";
|
||||
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
|
||||
if (!recognizer.Get()) {
|
||||
std::cerr << "Please check your config\n";
|
||||
return -1;
|
||||
}
|
||||
std::cout << "Loading model done\n";
|
||||
|
||||
std::string wave_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
|
||||
// clang-format on
|
||||
|
||||
Wave wave = ReadWave(wave_filename);
|
||||
if (wave.samples.empty()) {
|
||||
std::cerr << "Failed to read: '" << wave_filename << "'\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::cout << "Start recognition\n";
|
||||
const auto begin = std::chrono::steady_clock::now();
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
|
||||
wave.samples.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
|
||||
const auto end = std::chrono::steady_clock::now();
|
||||
const float elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
|
||||
.count() /
|
||||
1000.;
|
||||
float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
|
||||
float rtf = elapsed_seconds / duration;
|
||||
|
||||
std::cout << "text: " << result.text << "\n";
|
||||
printf("Number of threads: %d\n", config.model_config.num_threads);
|
||||
printf("Duration: %.3fs\n", duration);
|
||||
printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
|
||||
printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
|
||||
duration, rtf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -0,0 +1,240 @@
|
||||
// cxx-api-examples/wenet-ctc-simulate-streaming-microphone-cxx-api.cc
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
//
|
||||
// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API
|
||||
// for streaming speech recognition from a microphone.
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
//
|
||||
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
//
|
||||
// clang-format on
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <chrono> // NOLINT
|
||||
#include <condition_variable> // NOLINT
|
||||
#include <iostream>
|
||||
#include <mutex> // NOLINT
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "portaudio.h" // NOLINT
|
||||
#include "sherpa-display.h" // NOLINT
|
||||
#include "sherpa-onnx/c-api/cxx-api.h"
|
||||
#include "sherpa-onnx/csrc/microphone.h"
|
||||
|
||||
std::queue<std::vector<float>> samples_queue;
|
||||
std::condition_variable condition_variable;
|
||||
std::mutex mutex;
|
||||
bool stop = false;
|
||||
|
||||
static void Handler(int32_t /*sig*/) {
|
||||
stop = true;
|
||||
condition_variable.notify_one();
|
||||
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
|
||||
}
|
||||
|
||||
static int32_t RecordCallback(const void *input_buffer,
|
||||
void * /*output_buffer*/,
|
||||
unsigned long frames_per_buffer, // NOLINT
|
||||
const PaStreamCallbackTimeInfo * /*time_info*/,
|
||||
PaStreamCallbackFlags /*status_flags*/,
|
||||
void * /*user_data*/) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
samples_queue.emplace(
|
||||
reinterpret_cast<const float *>(input_buffer),
|
||||
reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
|
||||
condition_variable.notify_one();
|
||||
|
||||
return stop ? paComplete : paContinue;
|
||||
}
|
||||
|
||||
static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
VadModelConfig config;
|
||||
config.silero_vad.model = "./silero_vad.onnx";
|
||||
config.silero_vad.threshold = 0.5;
|
||||
config.silero_vad.min_silence_duration = 0.1;
|
||||
config.silero_vad.min_speech_duration = 0.25;
|
||||
config.silero_vad.max_speech_duration = 8;
|
||||
config.sample_rate = 16000;
|
||||
config.debug = false;
|
||||
|
||||
VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
|
||||
if (!vad.Get()) {
|
||||
std::cerr << "Failed to create VAD. Please check your config\n";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return vad;
|
||||
}
|
||||
|
||||
static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
OfflineRecognizerConfig config;
|
||||
|
||||
// clang-format off
|
||||
config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
|
||||
config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
|
||||
// clang-format on
|
||||
|
||||
config.model_config.num_threads = 2;
|
||||
config.model_config.debug = false;
|
||||
|
||||
std::cout << "Loading model\n";
|
||||
OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
|
||||
if (!recognizer.Get()) {
|
||||
std::cerr << "Please check your config\n";
|
||||
exit(-1);
|
||||
}
|
||||
std::cout << "Loading model done\n";
|
||||
return recognizer;
|
||||
}
|
||||
|
||||
int32_t main() {
|
||||
signal(SIGINT, Handler);
|
||||
|
||||
using namespace sherpa_onnx::cxx; // NOLINT
|
||||
|
||||
auto vad = CreateVad();
|
||||
auto recognizer = CreateOfflineRecognizer();
|
||||
|
||||
sherpa_onnx::Microphone mic;
|
||||
|
||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||
if (num_devices == 0) {
|
||||
std::cerr << " If you are using Linux, please try "
|
||||
"./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||
if (pDeviceIndex) {
|
||||
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||
device_index = atoi(pDeviceIndex);
|
||||
}
|
||||
mic.PrintDevices(device_index);
|
||||
|
||||
float mic_sample_rate = 16000;
|
||||
const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||
if (sample_rate_str) {
|
||||
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||
mic_sample_rate = atof(sample_rate_str);
|
||||
}
|
||||
float sample_rate = 16000;
|
||||
LinearResampler resampler;
|
||||
if (mic_sample_rate != sample_rate) {
|
||||
float min_freq = std::min(mic_sample_rate, sample_rate);
|
||||
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
||||
|
||||
int32_t lowpass_filter_width = 6;
|
||||
resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
|
||||
lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
std::cerr << "Failed to open microphone device\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t window_size = 512; // samples, please don't change
|
||||
|
||||
int32_t offset = 0;
|
||||
std::vector<float> buffer;
|
||||
bool speech_started = false;
|
||||
|
||||
auto started_time = std::chrono::steady_clock::now();
|
||||
|
||||
SherpaDisplay display;
|
||||
|
||||
std::cout << "Started! Please speak\n";
|
||||
|
||||
while (!stop) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
while (samples_queue.empty() && !stop) {
|
||||
condition_variable.wait(lock);
|
||||
}
|
||||
|
||||
if (stop) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto &s = samples_queue.front();
|
||||
if (!resampler.Get()) {
|
||||
buffer.insert(buffer.end(), s.begin(), s.end());
|
||||
} else {
|
||||
auto resampled = resampler.Resample(s.data(), s.size(), false);
|
||||
buffer.insert(buffer.end(), resampled.begin(), resampled.end());
|
||||
}
|
||||
|
||||
samples_queue.pop();
|
||||
}
|
||||
|
||||
for (; offset + window_size < buffer.size(); offset += window_size) {
|
||||
vad.AcceptWaveform(buffer.data() + offset, window_size);
|
||||
if (!speech_started && vad.IsDetected()) {
|
||||
speech_started = true;
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
}
|
||||
if (!speech_started) {
|
||||
if (buffer.size() > 10 * window_size) {
|
||||
offset -= buffer.size() - 10 * window_size;
|
||||
buffer = {buffer.end() - 10 * window_size, buffer.end()};
|
||||
}
|
||||
}
|
||||
|
||||
auto current_time = std::chrono::steady_clock::now();
|
||||
const float elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
|
||||
started_time)
|
||||
.count() /
|
||||
1000.;
|
||||
|
||||
if (speech_started && elapsed_seconds > 0.2) {
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
display.UpdateText(result.text);
|
||||
display.Display();
|
||||
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
while (!vad.IsEmpty()) {
|
||||
auto segment = vad.Front();
|
||||
|
||||
vad.Pop();
|
||||
|
||||
OfflineStream stream = recognizer.CreateStream();
|
||||
stream.AcceptWaveform(sample_rate, segment.samples.data(),
|
||||
segment.samples.size());
|
||||
|
||||
recognizer.Decode(&stream);
|
||||
|
||||
OfflineRecognizerResult result = recognizer.GetResult(&stream);
|
||||
|
||||
display.UpdateText(result.text);
|
||||
display.FinalizeCurrentSentence();
|
||||
display.Display();
|
||||
|
||||
buffer.clear();
|
||||
offset = 0;
|
||||
speech_started = false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
52
dart-api-examples/non-streaming-asr/bin/wenet-ctc.dart
Normal file
52
dart-api-examples/non-streaming-asr/bin/wenet-ctc.dart
Normal file
@ -0,0 +1,52 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
import 'dart:io';
|
||||
|
||||
import 'package:args/args.dart';
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
|
||||
import './init.dart';
|
||||
|
||||
void main(List<String> arguments) async {
|
||||
await initSherpaOnnx();
|
||||
|
||||
final parser = ArgParser()
|
||||
..addOption('model', help: 'Path to the Wenet CTC model')
|
||||
..addOption('tokens', help: 'Path to tokens.txt')
|
||||
..addOption('input-wav', help: 'Path to input.wav to transcribe');
|
||||
|
||||
final res = parser.parse(arguments);
|
||||
if (res['model'] == null ||
|
||||
res['tokens'] == null ||
|
||||
res['input-wav'] == null) {
|
||||
print(parser.usage);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
final model = res['model'] as String;
|
||||
final tokens = res['tokens'] as String;
|
||||
final inputWav = res['input-wav'] as String;
|
||||
|
||||
final wenetCtc = sherpa_onnx.OfflineWenetCtcModelConfig(model: model);
|
||||
|
||||
final modelConfig = sherpa_onnx.OfflineModelConfig(
|
||||
wenetCtc: wenetCtc,
|
||||
tokens: tokens,
|
||||
debug: true,
|
||||
numThreads: 1,
|
||||
);
|
||||
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
|
||||
final recognizer = sherpa_onnx.OfflineRecognizer(config);
|
||||
|
||||
final waveData = sherpa_onnx.readWave(inputWav);
|
||||
final stream = recognizer.createStream();
|
||||
|
||||
stream.acceptWaveform(
|
||||
samples: waveData.samples, sampleRate: waveData.sampleRate);
|
||||
recognizer.decode(stream);
|
||||
|
||||
final result = recognizer.getResult(stream);
|
||||
print(result.text);
|
||||
|
||||
stream.free();
|
||||
recognizer.free();
|
||||
}
|
||||
18
dart-api-examples/non-streaming-asr/run-wenet-ctc.sh
Executable file
18
dart-api-examples/non-streaming-asr/run-wenet-ctc.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
dart pub get
|
||||
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
dart run \
|
||||
./bin/wenet-ctc.dart \
|
||||
--model ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
|
||||
--tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
|
||||
--input-wav ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav
|
||||
@ -84,6 +84,9 @@ class OfflineDecodeFiles
|
||||
[Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
|
||||
public string TeleSpeechCtc { get; set; } = string.Empty;
|
||||
|
||||
[Option("wenet-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Wenet CTC models")]
|
||||
public string WenetCtc { get; set; } = string.Empty;
|
||||
|
||||
[Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
|
||||
public string SenseVoiceModel { get; set; } = string.Empty;
|
||||
|
||||
@ -251,6 +254,10 @@ to download pre-trained Tdnn models.
|
||||
{
|
||||
config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
|
||||
}
|
||||
else if (!string.IsNullOrEmpty(options.WenetCtc))
|
||||
{
|
||||
config.ModelConfig.WenetCtc.Model = options.WenetCtc;
|
||||
}
|
||||
else if (!string.IsNullOrEmpty(options.WhisperEncoder))
|
||||
{
|
||||
config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
|
||||
|
||||
14
dotnet-examples/offline-decode-files/run-wenet-ctc.sh
Executable file
14
dotnet-examples/offline-decode-files/run-wenet-ctc.sh
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
dotnet run \
|
||||
--wenet-ctc=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
|
||||
--tokens=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
|
||||
--files ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav
|
||||
@ -125,6 +125,27 @@ class OfflineZipformerCtcModelConfig {
|
||||
final String model;
|
||||
}
|
||||
|
||||
class OfflineWenetCtcModelConfig {
|
||||
const OfflineWenetCtcModelConfig({this.model = ''});
|
||||
|
||||
factory OfflineWenetCtcModelConfig.fromJson(Map<String, dynamic> json) {
|
||||
return OfflineWenetCtcModelConfig(
|
||||
model: json['model'] as String? ?? '',
|
||||
);
|
||||
}
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineWenetCtcModelConfig(model: $model)';
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
'model': model,
|
||||
};
|
||||
|
||||
final String model;
|
||||
}
|
||||
|
||||
class OfflineWhisperModelConfig {
|
||||
const OfflineWhisperModelConfig(
|
||||
{this.encoder = '',
|
||||
@ -349,6 +370,7 @@ class OfflineModelConfig {
|
||||
this.dolphin = const OfflineDolphinModelConfig(),
|
||||
this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
|
||||
this.canary = const OfflineCanaryModelConfig(),
|
||||
this.wenetCtc = const OfflineWenetCtcModelConfig(),
|
||||
required this.tokens,
|
||||
this.numThreads = 1,
|
||||
this.debug = true,
|
||||
@ -405,6 +427,10 @@ class OfflineModelConfig {
|
||||
? OfflineCanaryModelConfig.fromJson(
|
||||
json['canary'] as Map<String, dynamic>)
|
||||
: const OfflineCanaryModelConfig(),
|
||||
wenetCtc: json['wenetCtc'] != null
|
||||
? OfflineWenetCtcModelConfig.fromJson(
|
||||
json['wenetCtc'] as Map<String, dynamic>)
|
||||
: const OfflineWenetCtcModelConfig(),
|
||||
tokens: json['tokens'] as String,
|
||||
numThreads: json['numThreads'] as int? ?? 1,
|
||||
debug: json['debug'] as bool? ?? true,
|
||||
@ -418,7 +444,7 @@ class OfflineModelConfig {
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
|
||||
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, wenetCtc: $wenetCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {
|
||||
@ -433,6 +459,7 @@ class OfflineModelConfig {
|
||||
'dolphin': dolphin.toJson(),
|
||||
'zipformerCtc': zipformerCtc.toJson(),
|
||||
'canary': canary.toJson(),
|
||||
'wenetCtc': wenetCtc.toJson(),
|
||||
'tokens': tokens,
|
||||
'numThreads': numThreads,
|
||||
'debug': debug,
|
||||
@ -454,6 +481,7 @@ class OfflineModelConfig {
|
||||
final OfflineDolphinModelConfig dolphin;
|
||||
final OfflineZipformerCtcModelConfig zipformerCtc;
|
||||
final OfflineCanaryModelConfig canary;
|
||||
final OfflineWenetCtcModelConfig wenetCtc;
|
||||
|
||||
final String tokens;
|
||||
final int numThreads;
|
||||
@ -690,6 +718,8 @@ class OfflineRecognizer {
|
||||
c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
|
||||
c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;
|
||||
|
||||
c.ref.model.wenetCtc.model = config.model.wenetCtc.model.toNativeUtf8();
|
||||
|
||||
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
|
||||
|
||||
c.ref.model.numThreads = config.model.numThreads;
|
||||
@ -736,6 +766,7 @@ class OfflineRecognizer {
|
||||
calloc.free(c.ref.model.modelType);
|
||||
calloc.free(c.ref.model.provider);
|
||||
calloc.free(c.ref.model.tokens);
|
||||
calloc.free(c.ref.model.wenetCtc.model);
|
||||
calloc.free(c.ref.model.canary.tgtLang);
|
||||
calloc.free(c.ref.model.canary.srcLang);
|
||||
calloc.free(c.ref.model.canary.decoder);
|
||||
|
||||
@ -281,6 +281,10 @@ final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineWenetCtcModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
|
||||
external Pointer<Utf8> encoder;
|
||||
external Pointer<Utf8> decoder;
|
||||
@ -360,6 +364,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
|
||||
external SherpaOnnxOfflineDolphinModelConfig dolphin;
|
||||
external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
|
||||
external SherpaOnnxOfflineCanaryModelConfig canary;
|
||||
external SherpaOnnxOfflineWenetCtcModelConfig wenetCtc;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
|
||||
|
||||
@ -29,6 +29,7 @@ func main() {
|
||||
flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
|
||||
|
||||
flag.StringVar(&config.ModelConfig.ZipformerCtc.Model, "zipformer-ctc", "", "Path to the Zipformer CTC model")
|
||||
flag.StringVar(&config.ModelConfig.WenetCtc.Model, "wenet-ctc", "", "Path to the Wenet CTC model")
|
||||
|
||||
flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model")
|
||||
|
||||
|
||||
18
go-api-examples/non-streaming-decode-files/run-wenet-ctc.sh
Executable file
18
go-api-examples/non-streaming-decode-files/run-wenet-ctc.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
go mod tidy
|
||||
go build
|
||||
|
||||
./non-streaming-decode-files \
|
||||
--wenet-ctc ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
|
||||
--tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
|
||||
--debug 0 \
|
||||
./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav
|
||||
@ -14,8 +14,9 @@ export { Samples,
|
||||
OfflineNemoEncDecCtcModelConfig,
|
||||
OfflineWhisperModelConfig,
|
||||
OfflineTdnnModelConfig,
|
||||
OfflineSenseVoiceModelConfig,
|
||||
OfflineMoonshineModelConfig,
|
||||
OfflineSenseVoiceModelConfig,
|
||||
OfflineWenetCtcModelConfig,
|
||||
OfflineZipformerCtcModelConfig,
|
||||
OfflineModelConfig,
|
||||
OfflineLMConfig,
|
||||
|
||||
@ -61,6 +61,22 @@ GetOfflineZipformerCtcModelConfig(Napi::Object obj) {
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineWenetCtcModelConfig GetOfflineWenetCtcModelConfig(
|
||||
Napi::Object obj) {
|
||||
SherpaOnnxOfflineWenetCtcModelConfig c;
|
||||
memset(&c, 0, sizeof(c));
|
||||
|
||||
if (!obj.Has("wenetCtc") || !obj.Get("wenetCtc").IsObject()) {
|
||||
return c;
|
||||
}
|
||||
|
||||
Napi::Object o = obj.Get("wenetCtc").As<Napi::Object>();
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig(
|
||||
Napi::Object obj) {
|
||||
SherpaOnnxOfflineDolphinModelConfig c;
|
||||
@ -225,6 +241,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
|
||||
c.dolphin = GetOfflineDolphinModelConfig(o);
|
||||
c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
|
||||
c.canary = GetOfflineCanaryModelConfig(o);
|
||||
c.wenet_ctc = GetOfflineWenetCtcModelConfig(o);
|
||||
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
|
||||
@ -317,6 +334,8 @@ static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) {
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.wenet_ctc.model);
|
||||
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
|
||||
SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
|
||||
|
||||
@ -60,6 +60,10 @@ export class OfflineZipformerCtcModelConfig {
|
||||
public model: string = '';
|
||||
}
|
||||
|
||||
export class OfflineWenetCtcModelConfig {
|
||||
public model: string = '';
|
||||
}
|
||||
|
||||
export class OfflineWhisperModelConfig {
|
||||
public encoder: string = '';
|
||||
public decoder: string = '';
|
||||
@ -112,6 +116,7 @@ export class OfflineModelConfig {
|
||||
public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
|
||||
public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
|
||||
public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig();
|
||||
public wenetCtc: OfflineWenetCtcModelConfig = new OfflineWenetCtcModelConfig();
|
||||
}
|
||||
|
||||
export class OfflineLMConfig {
|
||||
|
||||
54
java-api-examples/NonStreamingDecodeFileWenetCtc.java
Normal file
54
java-api-examples/NonStreamingDecodeFileWenetCtc.java
Normal file
@ -0,0 +1,54 @@
|
||||
// Copyright 2025 Xiaomi Corporation
|
||||
|
||||
// This file shows how to use an offline Wenet CTC model,
|
||||
// i.e., non-streaming Wenet CTC model,
|
||||
// to decode files.
|
||||
import com.k2fsa.sherpa.onnx.*;
|
||||
|
||||
public class NonStreamingDecodeFileWenetCtc {
|
||||
public static void main(String[] args) {
|
||||
// please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
|
||||
// to download model files
|
||||
String model =
|
||||
"sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
|
||||
|
||||
String tokens =
|
||||
"sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
|
||||
|
||||
String waveFilename =
|
||||
"sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
|
||||
|
||||
WaveReader reader = new WaveReader(waveFilename);
|
||||
|
||||
OfflineWenetCtcModelConfig wenetCtc =
|
||||
OfflineWenetCtcModelConfig.builder().setModel(model).build();
|
||||
|
||||
OfflineModelConfig modelConfig =
|
||||
OfflineModelConfig.builder()
|
||||
.setWenetCtc(wenetCtc)
|
||||
.setTokens(tokens)
|
||||
.setNumThreads(1)
|
||||
.setDebug(true)
|
||||
.build();
|
||||
|
||||
OfflineRecognizerConfig config =
|
||||
OfflineRecognizerConfig.builder()
|
||||
.setOfflineModelConfig(modelConfig)
|
||||
.setDecodingMethod("greedy_search")
|
||||
.build();
|
||||
|
||||
OfflineRecognizer recognizer = new OfflineRecognizer(config);
|
||||
OfflineStream stream = recognizer.createStream();
|
||||
stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
|
||||
|
||||
recognizer.decode(stream);
|
||||
|
||||
String text = recognizer.getResult(stream).getText();
|
||||
|
||||
System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
|
||||
|
||||
stream.release();
|
||||
recognizer.release();
|
||||
}
|
||||
}
|
||||
37
java-api-examples/run-non-streaming-decode-file-wenet-ctc.sh
Executable file
37
java-api-examples/run-non-streaming-decode-file-wenet-ctc.sh
Executable file
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
|
||||
mkdir -p ../build
|
||||
pushd ../build
|
||||
cmake \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_JNI=ON \
|
||||
..
|
||||
|
||||
make -j4
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
|
||||
pushd ../sherpa-onnx/java-api
|
||||
make
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
java \
|
||||
-Djava.library.path=$PWD/../build/lib \
|
||||
-cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
|
||||
NonStreamingDecodeFileWenetCtc.java
|
||||
@ -489,8 +489,30 @@ function testOfflineNeMoCanary() {
|
||||
java -Djava.library.path=../build/lib -jar $out_filename
|
||||
}
|
||||
|
||||
function testOfflineWenetCtc() {
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
out_filename=test_offline_wenet_ctc.jar
|
||||
kotlinc-jvm -include-runtime -d $out_filename \
|
||||
test_offline_wenet_ctc.kt \
|
||||
FeatureConfig.kt \
|
||||
HomophoneReplacerConfig.kt \
|
||||
OfflineRecognizer.kt \
|
||||
OfflineStream.kt \
|
||||
WaveReader.kt \
|
||||
faked-asset-manager.kt
|
||||
|
||||
ls -lh $out_filename
|
||||
java -Djava.library.path=../build/lib -jar $out_filename
|
||||
}
|
||||
|
||||
testVersion
|
||||
|
||||
testOfflineWenetCtc
|
||||
testOfflineNeMoCanary
|
||||
testOfflineSenseVoiceWithHr
|
||||
testOfflineSpeechDenoiser
|
||||
|
||||
31
kotlin-api-examples/test_offline_wenet_ctc.kt
Normal file
31
kotlin-api-examples/test_offline_wenet_ctc.kt
Normal file
@ -0,0 +1,31 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
fun main() {
|
||||
val recognizer = createOfflineRecognizer()
|
||||
val waveFilename = "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"
|
||||
|
||||
val objArray = WaveReader.readWaveFromFile(
|
||||
filename = waveFilename,
|
||||
)
|
||||
val samples: FloatArray = objArray[0] as FloatArray
|
||||
val sampleRate: Int = objArray[1] as Int
|
||||
|
||||
var stream = recognizer.createStream()
|
||||
stream.acceptWaveform(samples, sampleRate=sampleRate)
|
||||
recognizer.decode(stream)
|
||||
|
||||
var result = recognizer.getResult(stream)
|
||||
println(result)
|
||||
|
||||
stream.release()
|
||||
recognizer.release()
|
||||
}
|
||||
|
||||
|
||||
fun createOfflineRecognizer(): OfflineRecognizer {
|
||||
val config = OfflineRecognizerConfig(
|
||||
modelConfig = getOfflineModelConfig(type = 42)!!,
|
||||
)
|
||||
|
||||
return OfflineRecognizer(config = config)
|
||||
}
|
||||
@ -124,6 +124,7 @@ The following tables list the examples in this folder.
|
||||
|[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
|
||||
|[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
|
||||
|[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
|
||||
|[./test_asr_non_streaming_wenet_ctc.js](./test_asr_non_streaming_wenet_ctc.js)|Non-streaming speech recognition from a file using a [u2pp_conformer_yue](https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue) CTC model with greedy search|
|
||||
|[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
|
||||
|[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
|
||||
|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
|
||||
@ -426,6 +427,16 @@ npm install naudiodon2
|
||||
node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with Wenet CTC models
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_wenet_ctc.js
|
||||
```
|
||||
|
||||
### Non-streaming speech recognition with Paraformer
|
||||
|
||||
```bash
|
||||
|
||||
48
nodejs-addon-examples/test_asr_non_streaming_wenet_ctc.js
Normal file
48
nodejs-addon-examples/test_asr_non_streaming_wenet_ctc.js
Normal file
@ -0,0 +1,48 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
const config = {
|
||||
'featConfig': {
|
||||
'sampleRate': 16000,
|
||||
'featureDim': 80,
|
||||
},
|
||||
'modelConfig': {
|
||||
'wenetCtc': {
|
||||
'model':
|
||||
'./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx',
|
||||
},
|
||||
'tokens':
|
||||
'./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt',
|
||||
'numThreads': 2,
|
||||
'provider': 'cpu',
|
||||
'debug': 1,
|
||||
}
|
||||
};
|
||||
|
||||
const waveFilename =
|
||||
'./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
|
||||
|
||||
const recognizer = new sherpa_onnx.OfflineRecognizer(config);
|
||||
console.log('Started')
|
||||
let start = Date.now();
|
||||
const stream = recognizer.createStream();
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||
|
||||
recognizer.decode(stream);
|
||||
result = recognizer.getResult(stream)
|
||||
let stop = Date.now();
|
||||
console.log('Done')
|
||||
|
||||
const elapsed_seconds = (stop - start) / 1000;
|
||||
const duration = wave.samples.length / wave.sampleRate;
|
||||
const real_time_factor = elapsed_seconds / duration;
|
||||
console.log('Wave duration', duration.toFixed(3), 'seconds')
|
||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
|
||||
console.log(
|
||||
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||
real_time_factor.toFixed(3))
|
||||
console.log(waveFilename)
|
||||
console.log('result\n', result)
|
||||
@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
node ./test-offline-zipformer-ctc.js
|
||||
```
|
||||
|
||||
## ./test-offline-wenet-ctc.js
|
||||
|
||||
[./test-offline-wenet-ctc.js](./test-offline-wenet-ctc.js) demonstrates
|
||||
how to decode a file with a Wenet CTC model. In the code we use
|
||||
[sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2).
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
|
||||
node ./test-offline-wenet-ctc.js
|
||||
```
|
||||
|
||||
## ./test-offline-nemo-ctc.js
|
||||
|
||||
[./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates
|
||||
|
||||
37
nodejs-examples/test-offline-wenet-ctc.js
Normal file
37
nodejs-examples/test-offline-wenet-ctc.js
Normal file
@ -0,0 +1,37 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
//
|
||||
const fs = require('fs');
|
||||
const {Readable} = require('stream');
|
||||
const wav = require('wav');
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineRecognizer() {
|
||||
let config = {
|
||||
modelConfig: {
|
||||
wenetCtc: {
|
||||
model:
|
||||
'./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx',
|
||||
},
|
||||
tokens:
|
||||
'./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt',
|
||||
}
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineRecognizer(config);
|
||||
}
|
||||
|
||||
const recognizer = createOfflineRecognizer();
|
||||
const stream = recognizer.createStream();
|
||||
|
||||
const waveFilename =
|
||||
'./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||
|
||||
recognizer.decode(stream);
|
||||
const text = recognizer.getResult(stream).text;
|
||||
console.log(text);
|
||||
|
||||
stream.free();
|
||||
recognizer.free();
|
||||
@ -10,4 +10,5 @@ telespeech_ctc
|
||||
moonshine
|
||||
dolphin_ctc
|
||||
zipformer_ctc
|
||||
wenet_ctc
|
||||
nemo_canary
|
||||
|
||||
42
pascal-api-examples/non-streaming-asr/run-wenet-ctc.sh
Executable file
42
pascal-api-examples/non-streaming-asr/run-wenet-ctc.sh
Executable file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
|
||||
|
||||
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
|
||||
|
||||
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
|
||||
mkdir -p ../../build
|
||||
pushd ../../build
|
||||
cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=./install \
|
||||
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
|
||||
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
|
||||
..
|
||||
|
||||
cmake --build . --target install --config Release
|
||||
ls -lh lib
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
fpc \
|
||||
-dSHERPA_ONNX_USE_SHARED_LIBS \
|
||||
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
|
||||
-Fl$SHERPA_ONNX_DIR/build/install/lib \
|
||||
./wenet_ctc.pas
|
||||
|
||||
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
|
||||
|
||||
./wenet_ctc
|
||||
76
pascal-api-examples/non-streaming-asr/wenet_ctc.pas
Normal file
76
pascal-api-examples/non-streaming-asr/wenet_ctc.pas
Normal file
@ -0,0 +1,76 @@
|
||||
{ Copyright (c) 2025 Xiaomi Corporation }
|
||||
|
||||
{
|
||||
This file shows how to use a non-streaming Wenet CTC model
|
||||
to decode files.
|
||||
|
||||
You can download the model files from
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
}
|
||||
|
||||
program wenet_ctc;
|
||||
|
||||
{$mode objfpc}
|
||||
|
||||
uses
|
||||
sherpa_onnx,
|
||||
DateUtils,
|
||||
SysUtils;
|
||||
|
||||
var
|
||||
Wave: TSherpaOnnxWave;
|
||||
WaveFilename: AnsiString;
|
||||
|
||||
Config: TSherpaOnnxOfflineRecognizerConfig;
|
||||
Recognizer: TSherpaOnnxOfflineRecognizer;
|
||||
Stream: TSherpaOnnxOfflineStream;
|
||||
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
|
||||
|
||||
Start: TDateTime;
|
||||
Stop: TDateTime;
|
||||
|
||||
Elapsed: Single;
|
||||
Duration: Single;
|
||||
RealTimeFactor: Single;
|
||||
begin
|
||||
Initialize(Config);
|
||||
|
||||
Config.ModelConfig.WenetCtc.Model := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx';
|
||||
Config.ModelConfig.Tokens := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt';
|
||||
Config.ModelConfig.Provider := 'cpu';
|
||||
Config.ModelConfig.NumThreads := 1;
|
||||
Config.ModelConfig.Debug := False;
|
||||
|
||||
WaveFilename := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
|
||||
|
||||
Wave := SherpaOnnxReadWave(WaveFilename);
|
||||
|
||||
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
|
||||
Stream := Recognizer.CreateStream();
|
||||
Start := Now;
|
||||
|
||||
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
|
||||
Recognizer.Decode(Stream);
|
||||
|
||||
RecognitionResult := Recognizer.GetResult(Stream);
|
||||
|
||||
Stop := Now;
|
||||
|
||||
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
|
||||
Duration := Length(Wave.Samples) / Wave.SampleRate;
|
||||
RealTimeFactor := Elapsed / Duration;
|
||||
|
||||
WriteLn(RecognitionResult.ToString);
|
||||
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
|
||||
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
|
||||
WriteLn(Format('Wave duration %.3f s', [Duration]));
|
||||
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
|
||||
|
||||
{Free resources to avoid memory leak.
|
||||
|
||||
Note: You don't need to invoke them for this simple script.
|
||||
However, you have to invoke them in your own large/complex project.
|
||||
}
|
||||
FreeAndNil(Stream);
|
||||
FreeAndNil(Recognizer);
|
||||
end.
|
||||
@ -184,11 +184,25 @@ def get_2nd_models():
|
||||
pushd $model_name
|
||||
|
||||
rm -rfv test_wavs
|
||||
rm -fv model.onnx
|
||||
rm -fv *.py
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
|
||||
idx=42,
|
||||
lang="zh_en_yue",
|
||||
short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8",
|
||||
cmd="""
|
||||
pushd $model_name
|
||||
|
||||
rm -rfv test_wavs
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
@ -399,6 +413,7 @@ def get_models():
|
||||
"sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
|
||||
"sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
|
||||
"sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
|
||||
"sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
|
||||
]
|
||||
for first_m in first_zh:
|
||||
for second_m in second_zh:
|
||||
@ -425,6 +440,10 @@ def get_models():
|
||||
"sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
|
||||
"sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
|
||||
),
|
||||
(
|
||||
"sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
|
||||
"sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
|
||||
),
|
||||
]
|
||||
models = []
|
||||
for f, s in combinations:
|
||||
|
||||
@ -711,6 +711,22 @@ def get_models():
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
|
||||
idx=42,
|
||||
lang="zh_en_yue",
|
||||
lang2="中英粤",
|
||||
short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8",
|
||||
cmd="""
|
||||
pushd $model_name
|
||||
|
||||
rm -rfv test_wavs
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
|
||||
@ -29,6 +29,7 @@ namespace SherpaOnnx
|
||||
Dolphin = new OfflineDolphinModelConfig();
|
||||
ZipformerCtc = new OfflineZipformerCtcModelConfig();
|
||||
Canary = new OfflineCanaryModelConfig();
|
||||
WenetCtc = new OfflineWenetCtcModelConfig();
|
||||
}
|
||||
public OfflineTransducerModelConfig Transducer;
|
||||
public OfflineParaformerModelConfig Paraformer;
|
||||
@ -64,5 +65,6 @@ namespace SherpaOnnx
|
||||
public OfflineDolphinModelConfig Dolphin;
|
||||
public OfflineZipformerCtcModelConfig ZipformerCtc;
|
||||
public OfflineCanaryModelConfig Canary;
|
||||
public OfflineWenetCtcModelConfig WenetCtc;
|
||||
}
|
||||
}
|
||||
|
||||
18
scripts/dotnet/OfflineWenetCtcModelConfig.cs
Normal file
18
scripts/dotnet/OfflineWenetCtcModelConfig.cs
Normal file
@ -0,0 +1,18 @@
|
||||
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineWenetCtcModelConfig
|
||||
{
|
||||
public OfflineWenetCtcModelConfig()
|
||||
{
|
||||
Model = "";
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
}
|
||||
}
|
||||
1
scripts/go/_internal/non-streaming-decode-files/run-wenet-ctc.sh
Symbolic link
1
scripts/go/_internal/non-streaming-decode-files/run-wenet-ctc.sh
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../go-api-examples/non-streaming-decode-files/run-wenet-ctc.sh
|
||||
@ -418,6 +418,10 @@ type OfflineZipformerCtcModelConfig struct {
|
||||
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
|
||||
}
|
||||
|
||||
type OfflineWenetCtcModelConfig struct {
|
||||
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
|
||||
}
|
||||
|
||||
type OfflineDolphinModelConfig struct {
|
||||
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
|
||||
}
|
||||
@ -478,6 +482,7 @@ type OfflineModelConfig struct {
|
||||
Dolphin OfflineDolphinModelConfig
|
||||
ZipformerCtc OfflineZipformerCtcModelConfig
|
||||
Canary OfflineCanaryModelConfig
|
||||
WenetCtc OfflineWenetCtcModelConfig
|
||||
Tokens string // Path to tokens.txt
|
||||
|
||||
// Number of threads to use for neural network computation
|
||||
@ -579,6 +584,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
|
||||
c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang)
|
||||
c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc)
|
||||
|
||||
c.model_config.wenet_ctc.model = C.CString(config.ModelConfig.WenetCtc.Model)
|
||||
|
||||
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
||||
|
||||
c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
|
||||
@ -727,6 +734,11 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
|
||||
c.model_config.canary.tgt_lang = nil
|
||||
}
|
||||
|
||||
if c.model_config.wenet_ctc.model != nil {
|
||||
C.free(unsafe.Pointer(c.model_config.wenet_ctc.model))
|
||||
c.model_config.wenet_ctc.model = nil
|
||||
}
|
||||
|
||||
if c.model_config.tokens != nil {
|
||||
C.free(unsafe.Pointer(c.model_config.tokens))
|
||||
c.model_config.tokens = nil
|
||||
|
||||
@ -506,6 +506,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
|
||||
recognizer_config.model_config.canary.use_pnc =
|
||||
config->model_config.canary.use_pnc;
|
||||
|
||||
recognizer_config.model_config.wenet_ctc.model =
|
||||
SHERPA_ONNX_OR(config->model_config.wenet_ctc.model, "");
|
||||
|
||||
recognizer_config.lm_config.model =
|
||||
SHERPA_ONNX_OR(config->lm_config.model, "");
|
||||
recognizer_config.lm_config.scale =
|
||||
|
||||
@ -476,6 +476,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineZipformerCtcModelConfig {
|
||||
const char *model;
|
||||
} SherpaOnnxOfflineZipformerCtcModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWenetCtcModelConfig {
|
||||
const char *model;
|
||||
} SherpaOnnxOfflineWenetCtcModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
|
||||
SherpaOnnxOfflineTransducerModelConfig transducer;
|
||||
SherpaOnnxOfflineParaformerModelConfig paraformer;
|
||||
@ -501,6 +505,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
|
||||
SherpaOnnxOfflineDolphinModelConfig dolphin;
|
||||
SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc;
|
||||
SherpaOnnxOfflineCanaryModelConfig canary;
|
||||
SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc;
|
||||
} SherpaOnnxOfflineModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
|
||||
|
||||
@ -265,6 +265,8 @@ static SherpaOnnxOfflineRecognizerConfig Convert(
|
||||
c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str();
|
||||
c.model_config.canary.use_pnc = config.model_config.canary.use_pnc;
|
||||
|
||||
c.model_config.wenet_ctc.model = config.model_config.wenet_ctc.model.c_str();
|
||||
|
||||
c.lm_config.model = config.lm_config.model.c_str();
|
||||
c.lm_config.scale = config.lm_config.scale;
|
||||
|
||||
|
||||
@ -264,6 +264,10 @@ struct SHERPA_ONNX_API OfflineZipformerCtcModelConfig {
|
||||
std::string model;
|
||||
};
|
||||
|
||||
struct SHERPA_ONNX_API OfflineWenetCtcModelConfig {
|
||||
std::string model;
|
||||
};
|
||||
|
||||
struct SHERPA_ONNX_API OfflineMoonshineModelConfig {
|
||||
std::string preprocessor;
|
||||
std::string encoder;
|
||||
@ -292,6 +296,7 @@ struct SHERPA_ONNX_API OfflineModelConfig {
|
||||
OfflineDolphinModelConfig dolphin;
|
||||
OfflineZipformerCtcModelConfig zipformer_ctc;
|
||||
OfflineCanaryModelConfig canary;
|
||||
OfflineWenetCtcModelConfig wenet_ctc;
|
||||
};
|
||||
|
||||
struct SHERPA_ONNX_API OfflineLMConfig {
|
||||
|
||||
@ -36,6 +36,7 @@ java_files += OfflineFireRedAsrModelConfig.java
|
||||
java_files += OfflineMoonshineModelConfig.java
|
||||
java_files += OfflineNemoEncDecCtcModelConfig.java
|
||||
java_files += OfflineZipformerCtcModelConfig.java
|
||||
java_files += OfflineWenetCtcModelConfig.java
|
||||
java_files += OfflineCanaryModelConfig.java
|
||||
java_files += OfflineSenseVoiceModelConfig.java
|
||||
java_files += OfflineDolphinModelConfig.java
|
||||
|
||||
@ -12,6 +12,7 @@ public class OfflineModelConfig {
|
||||
private final OfflineSenseVoiceModelConfig senseVoice;
|
||||
private final OfflineDolphinModelConfig dolphin;
|
||||
private final OfflineZipformerCtcModelConfig zipformerCtc;
|
||||
private final OfflineWenetCtcModelConfig wenetCtc;
|
||||
private final OfflineCanaryModelConfig canary;
|
||||
private final String teleSpeech;
|
||||
private final String tokens;
|
||||
@ -32,6 +33,7 @@ public class OfflineModelConfig {
|
||||
this.nemo = builder.nemo;
|
||||
this.zipformerCtc = builder.zipformerCtc;
|
||||
this.canary = builder.canary;
|
||||
this.wenetCtc = builder.wenetCtc;
|
||||
this.senseVoice = builder.senseVoice;
|
||||
this.dolphin = builder.dolphin;
|
||||
this.teleSpeech = builder.teleSpeech;
|
||||
@ -80,6 +82,10 @@ public class OfflineModelConfig {
|
||||
return zipformerCtc;
|
||||
}
|
||||
|
||||
public OfflineWenetCtcModelConfig getWenetCtc() {
|
||||
return wenetCtc;
|
||||
}
|
||||
|
||||
public OfflineCanaryModelConfig getCanary() {
|
||||
return canary;
|
||||
}
|
||||
@ -126,6 +132,7 @@ public class OfflineModelConfig {
|
||||
private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
|
||||
private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
|
||||
private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build();
|
||||
private OfflineWenetCtcModelConfig wenetCtc = OfflineWenetCtcModelConfig.builder().build();
|
||||
private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build();
|
||||
private String teleSpeech = "";
|
||||
private String tokens = "";
|
||||
@ -165,6 +172,11 @@ public class OfflineModelConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setWenetCtc(OfflineWenetCtcModelConfig wenetCtc) {
|
||||
this.wenetCtc = wenetCtc;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setCanary(OfflineCanaryModelConfig canary) {
|
||||
this.canary = canary;
|
||||
return this;
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflineWenetCtcModelConfig {
|
||||
private final String model;
|
||||
|
||||
private OfflineWenetCtcModelConfig(Builder builder) {
|
||||
this.model = builder.model;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public String getModel() {
|
||||
return model;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private String model = "";
|
||||
|
||||
public OfflineWenetCtcModelConfig build() {
|
||||
return new OfflineWenetCtcModelConfig(this);
|
||||
}
|
||||
|
||||
public Builder setModel(String model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -284,6 +284,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
|
||||
ans.model_config.zipformer_ctc.model = p;
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
// wenet ctc
|
||||
fid = env->GetFieldID(model_config_cls, "wenetCtc",
|
||||
"Lcom/k2fsa/sherpa/onnx/OfflineWenetCtcModelConfig;");
|
||||
jobject wenet_ctc_config = env->GetObjectField(model_config, fid);
|
||||
jclass wenet_ctc_config_cls = env->GetObjectClass(wenet_ctc_config);
|
||||
|
||||
fid = env->GetFieldID(wenet_ctc_config_cls, "model", "Ljava/lang/String;");
|
||||
|
||||
s = (jstring)env->GetObjectField(wenet_ctc_config, fid);
|
||||
p = env->GetStringUTFChars(s, nullptr);
|
||||
ans.model_config.wenet_ctc.model = p;
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
// canary
|
||||
fid = env->GetFieldID(model_config_cls, "canary",
|
||||
"Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;");
|
||||
|
||||
@ -36,6 +36,10 @@ data class OfflineZipformerCtcModelConfig(
|
||||
var model: String = "",
|
||||
)
|
||||
|
||||
data class OfflineWenetCtcModelConfig(
|
||||
var model: String = "",
|
||||
)
|
||||
|
||||
data class OfflineWhisperModelConfig(
|
||||
var encoder: String = "",
|
||||
var decoder: String = "",
|
||||
@ -80,6 +84,7 @@ data class OfflineModelConfig(
|
||||
var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
|
||||
var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(),
|
||||
var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(),
|
||||
var wenetCtc: OfflineWenetCtcModelConfig = OfflineWenetCtcModelConfig(),
|
||||
var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(),
|
||||
var teleSpeech: String = "",
|
||||
var numThreads: Int = 1,
|
||||
@ -705,6 +710,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
)
|
||||
}
|
||||
|
||||
42 -> {
|
||||
val modelDir = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10"
|
||||
return OfflineModelConfig(
|
||||
wenetCtc = OfflineWenetCtcModelConfig(
|
||||
model = "$modelDir/model.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
)
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
@ -313,6 +313,11 @@ type
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOfflineWenetCtcModelConfig = record
|
||||
Model: AnsiString;
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
|
||||
TSherpaOnnxOfflineWhisperModelConfig = record
|
||||
Encoder: AnsiString;
|
||||
Decoder: AnsiString;
|
||||
@ -387,6 +392,7 @@ type
|
||||
Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
|
||||
ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
|
||||
Canary: TSherpaOnnxOfflineCanaryModelConfig;
|
||||
WenetCtc: TSherpaOnnxOfflineWenetCtcModelConfig;
|
||||
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
|
||||
function ToString: AnsiString;
|
||||
end;
|
||||
@ -794,6 +800,9 @@ type
|
||||
SherpaOnnxOfflineZipformerCtcModelConfig = record
|
||||
Model: PAnsiChar;
|
||||
end;
|
||||
SherpaOnnxOfflineWenetCtcModelConfig = record
|
||||
Model: PAnsiChar;
|
||||
end;
|
||||
SherpaOnnxOfflineWhisperModelConfig = record
|
||||
Encoder: PAnsiChar;
|
||||
Decoder: PAnsiChar;
|
||||
@ -850,6 +859,7 @@ type
|
||||
Dolphin: SherpaOnnxOfflineDolphinModelConfig;
|
||||
ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
|
||||
Canary: SherpaOnnxOfflineCanaryModelConfig;
|
||||
WenetCtc: SherpaOnnxOfflineWenetCtcModelConfig;
|
||||
end;
|
||||
|
||||
SherpaOnnxOfflineRecognizerConfig = record
|
||||
@ -1658,6 +1668,12 @@ begin
|
||||
[Self.Model]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOfflineWenetCtcModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOfflineWenetCtcModelConfig(Model := %s)',
|
||||
[Self.Model]);
|
||||
end;
|
||||
|
||||
function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
|
||||
begin
|
||||
Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
|
||||
@ -1747,7 +1763,8 @@ begin
|
||||
'FireRedAsr := %s, ' +
|
||||
'Dolphin := %s, ' +
|
||||
'ZipformerCtc := %s, ' +
|
||||
'Canary := %s' +
|
||||
'Canary := %s, ' +
|
||||
'WenetCtc := %s' +
|
||||
')',
|
||||
[Self.Transducer.ToString, Self.Paraformer.ToString,
|
||||
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
|
||||
@ -1755,7 +1772,7 @@ begin
|
||||
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
|
||||
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
|
||||
Self.FireRedAsr.ToString, Self.Dolphin.ToString,
|
||||
Self.ZipformerCtc.ToString, Self.Canary.ToString
|
||||
Self.ZipformerCtc.ToString, Self.Canary.ToString, Self.WenetCtc.ToString
|
||||
]);
|
||||
end;
|
||||
|
||||
@ -1834,6 +1851,8 @@ begin
|
||||
C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang);
|
||||
C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc);
|
||||
|
||||
C.ModelConfig.WenetCtc.Model := PAnsiChar(Config.ModelConfig.WenetCtc.Model);
|
||||
|
||||
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
|
||||
C.LMConfig.Scale := Config.LMConfig.Scale;
|
||||
|
||||
|
||||
1
swift-api-examples/.gitignore
vendored
1
swift-api-examples/.gitignore
vendored
@ -19,6 +19,7 @@ speech-enhancement-gtcrn
|
||||
decode-file-sense-voice-with-hr
|
||||
test-version
|
||||
zipformer-ctc-asr
|
||||
wenet-ctc-asr
|
||||
dolphin-ctc-asr
|
||||
tts-kitten-en
|
||||
compute-speaker-embeddings
|
||||
|
||||
@ -360,6 +360,14 @@ func sherpaOnnxOfflineZipformerCtcModelConfig(
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineWenetCtcModelConfig(
|
||||
model: String = ""
|
||||
) -> SherpaOnnxOfflineWenetCtcModelConfig {
|
||||
return SherpaOnnxOfflineWenetCtcModelConfig(
|
||||
model: toCPointer(model)
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
|
||||
model: String = ""
|
||||
) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig {
|
||||
@ -482,7 +490,9 @@ func sherpaOnnxOfflineModelConfig(
|
||||
dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(),
|
||||
zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig =
|
||||
sherpaOnnxOfflineZipformerCtcModelConfig(),
|
||||
canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig()
|
||||
canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig(),
|
||||
wenetCtc: SherpaOnnxOfflineWenetCtcModelConfig =
|
||||
sherpaOnnxOfflineWenetCtcModelConfig()
|
||||
) -> SherpaOnnxOfflineModelConfig {
|
||||
return SherpaOnnxOfflineModelConfig(
|
||||
transducer: transducer,
|
||||
@ -503,7 +513,8 @@ func sherpaOnnxOfflineModelConfig(
|
||||
fire_red_asr: fireRedAsr,
|
||||
dolphin: dolphin,
|
||||
zipformer_ctc: zipformerCtc,
|
||||
canary: canary
|
||||
canary: canary,
|
||||
wenet_ctc: wenetCtc
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
34
swift-api-examples/run-wenet-ctc-asr.sh
Executable file
34
swift-api-examples/run-wenet-ctc-asr.sh
Executable file
@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ../build-swift-macos ]; then
|
||||
echo "Please run ../build-swift-macos.sh first!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -e ./wenet-ctc-asr ]; then
|
||||
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||
swiftc \
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./wenet-ctc-asr.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o wenet-ctc-asr
|
||||
|
||||
strip wenet-ctc-asr
|
||||
else
|
||||
echo "./wenet-ctc-asr exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./wenet-ctc-asr
|
||||
68
swift-api-examples/wenet-ctc-asr.swift
Normal file
68
swift-api-examples/wenet-ctc-asr.swift
Normal file
@ -0,0 +1,68 @@
|
||||
import AVFoundation
|
||||
|
||||
extension AudioBuffer {
|
||||
func array() -> [Float] {
|
||||
return Array(UnsafeBufferPointer(self))
|
||||
}
|
||||
}
|
||||
|
||||
extension AVAudioPCMBuffer {
|
||||
func array() -> [Float] {
|
||||
return self.audioBufferList.pointee.mBuffers.array()
|
||||
}
|
||||
}
|
||||
|
||||
func run() {
|
||||
let model =
|
||||
"./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"
|
||||
let tokens =
|
||||
"./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"
|
||||
|
||||
let wenetCtc = sherpaOnnxOfflineWenetCtcModelConfig(
|
||||
model: model
|
||||
)
|
||||
|
||||
let modelConfig = sherpaOnnxOfflineModelConfig(
|
||||
tokens: tokens,
|
||||
debug: 0,
|
||||
wenetCtc: wenetCtc
|
||||
)
|
||||
|
||||
let featConfig = sherpaOnnxFeatureConfig(
|
||||
sampleRate: 16000,
|
||||
featureDim: 80
|
||||
)
|
||||
var config = sherpaOnnxOfflineRecognizerConfig(
|
||||
featConfig: featConfig,
|
||||
modelConfig: modelConfig
|
||||
)
|
||||
|
||||
let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
|
||||
|
||||
let filePath =
|
||||
"./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"
|
||||
let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
|
||||
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
|
||||
|
||||
let audioFormat = audioFile.processingFormat
|
||||
assert(audioFormat.channelCount == 1)
|
||||
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
|
||||
|
||||
let audioFrameCount = UInt32(audioFile.length)
|
||||
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
|
||||
|
||||
try! audioFile.read(into: audioFileBuffer!)
|
||||
let array: [Float]! = audioFileBuffer?.array()
|
||||
let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
|
||||
print("\nresult is:\n\(result.text)")
|
||||
if result.timestamps.count != 0 {
|
||||
print("\ntimestamps is:\n\(result.timestamps)")
|
||||
}
|
||||
}
|
||||
|
||||
@main
|
||||
struct App {
|
||||
static func main() {
|
||||
run()
|
||||
}
|
||||
}
|
||||
@ -51,6 +51,10 @@ function freeConfig(config, Module) {
|
||||
freeConfig(config.zipformerCtc, Module)
|
||||
}
|
||||
|
||||
if ('wenetCtc' in config) {
|
||||
freeConfig(config.wenetCtc, Module)
|
||||
}
|
||||
|
||||
if ('moonshine' in config) {
|
||||
freeConfig(config.moonshine, Module)
|
||||
}
|
||||
@ -733,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineWenetCtcModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model || '', buffer, n);
|
||||
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
||||
@ -997,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
};
|
||||
}
|
||||
|
||||
if (!('wenetCtc' in config)) {
|
||||
config.wenetCtc = {
|
||||
model: '',
|
||||
};
|
||||
}
|
||||
|
||||
if (!('whisper' in config)) {
|
||||
config.whisper = {
|
||||
encoder: '',
|
||||
@ -1078,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
|
||||
const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
|
||||
|
||||
const wenetCtc =
|
||||
initSherpaOnnxOfflineWenetCtcModelConfig(config.wenetCtc, Module);
|
||||
|
||||
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
||||
dolphin.len + zipformerCtc.len + canary.len;
|
||||
dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len;
|
||||
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
@ -1188,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
|
||||
offset += canary.len;
|
||||
|
||||
Module._CopyHeap(wenetCtc.ptr, wenetCtc.len, ptr + offset);
|
||||
offset += wenetCtc.len;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
||||
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
||||
dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
|
||||
wenetCtc: wenetCtc,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -14,6 +14,7 @@ static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWenetCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
|
||||
@ -35,7 +36,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
||||
sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineDolphinModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineCanaryModelConfig),
|
||||
sizeof(SherpaOnnxOfflineCanaryModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineWenetCtcModelConfig),
|
||||
|
||||
"");
|
||||
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||
@ -83,6 +85,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
auto dolphin = &model_config->dolphin;
|
||||
auto zipformer_ctc = &model_config->zipformer_ctc;
|
||||
auto canary = &model_config->canary;
|
||||
auto wenet_ctc = &model_config->wenet_ctc;
|
||||
|
||||
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||
@ -133,6 +136,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang);
|
||||
fprintf(stdout, "use_pnc: %d\n", canary->use_pnc);
|
||||
|
||||
fprintf(stdout, "----------offline wenet ctc model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", wenet_ctc->model);
|
||||
|
||||
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user