mirror of
https://github.com/k2-fsa/sherpa-onnx.git
synced 2026-01-09 07:41:06 +08:00
Add a C++ example for simulated streaming ASR (#2607)
This commit is contained in:
parent
9102f34179
commit
86af28157b
12
.github/workflows/aarch64-linux-gnu-shared.yaml
vendored
12
.github/workflows/aarch64-linux-gnu-shared.yaml
vendored
@ -66,12 +66,17 @@ jobs:
|
||||
./gitcompile
|
||||
popd
|
||||
|
||||
p=$PWD
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DCMAKE_INSTALL_PREFIX=./install \
|
||||
-DSHERPA_ONNX_ENABLE_GPU=ON \
|
||||
@ -113,13 +118,18 @@ jobs:
|
||||
./gitcompile
|
||||
popd
|
||||
|
||||
p=$PWD
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DCMAKE_INSTALL_PREFIX=./install \
|
||||
..
|
||||
@ -245,7 +255,7 @@ jobs:
|
||||
file: sherpa-onnx-*linux-aarch64*.tar.bz2
|
||||
# repo_name: k2-fsa/sherpa-onnx
|
||||
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
# tag: v1.11.5
|
||||
# tag: v1.12.13
|
||||
|
||||
- name: Test offline Moonshine
|
||||
if: matrix.build_type != 'Debug'
|
||||
|
||||
@ -67,11 +67,16 @@ jobs:
|
||||
popd
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
p=$PWD
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-DBUILD_SHARED_LIBS=OFF \
|
||||
-DCMAKE_INSTALL_PREFIX=./install \
|
||||
..
|
||||
|
||||
10
.github/workflows/build-wheels-aarch64-cuda.yaml
vendored
10
.github/workflows/build-wheels-aarch64-cuda.yaml
vendored
@ -46,7 +46,15 @@ jobs:
|
||||
echo "PWD"
|
||||
ls -lh /project/alsa-lib/src/.libs
|
||||
|
||||
CIBW_ENVIRONMENT: CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR SHERPA_ONNX_MAKE_ARGS="VERBOSE=1" SHERPA_ONNX_ENABLE_ALSA=1 SHERPA_ONNX_ENABLE_GPU=ON
|
||||
CIBW_ENVIRONMENT: >
|
||||
CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
C_INCLUDE_PATH=/project/alsa-lib/include:$C_INCLUDE_PATH
|
||||
SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs
|
||||
LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR
|
||||
SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
|
||||
SHERPA_ONNX_ENABLE_ALSA=1
|
||||
SHERPA_ONNX_ENABLE_GPU=ON
|
||||
SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_GPU=ON -DALSA_INCLUDE_DIR=/project/alsa-lib/include -DALSA_LIBRARY=/project/alsa-lib/src/.libs/libasound.so"
|
||||
CIBW_BUILD: "${{ matrix.python-version}}-* "
|
||||
CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
|
||||
CIBW_BUILD_VERBOSITY: 3
|
||||
|
||||
@ -109,6 +109,7 @@ jobs:
|
||||
strings $PWD/alsa-lib/src/.libs/libasound.so.2.0.0 | grep "^GLIBC"
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
export SHERPA_ONNX_RKNN_TOOLKIT2_PATH=$PWD/rknn-toolkit2
|
||||
@ -117,7 +118,7 @@ jobs:
|
||||
|
||||
export SHERPA_ONNX_ENABLE_ALSA=1
|
||||
|
||||
export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_RKNN=ON"
|
||||
export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_RKNN=ON -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"
|
||||
python3 setup.py bdist_wheel
|
||||
|
||||
mv dist wheelhouse
|
||||
|
||||
3
.github/workflows/build-wheels-aarch64.yaml
vendored
3
.github/workflows/build-wheels-aarch64.yaml
vendored
@ -326,12 +326,13 @@ jobs:
|
||||
|
||||
CIBW_ENVIRONMENT: >
|
||||
SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
|
||||
C_INCLUDE_PATH=/project/alsa-lib/include:$C_INCLUDE_PATH
|
||||
CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs
|
||||
LD_LIBRARY_PATH=/project/build/bdist.linux-aarch64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR
|
||||
SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
|
||||
SHERPA_ONNX_ENABLE_ALSA=1
|
||||
SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"
|
||||
SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"
|
||||
|
||||
CIBW_BUILD: "${{ matrix.python-version}}-* "
|
||||
CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
|
||||
|
||||
@ -58,6 +58,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
export LD_LIBRARY_PATH=$SHERPA_ONNX_ALSA_LIB_DIR:$LD_LIBRARY_PATH
|
||||
|
||||
@ -66,9 +67,11 @@ jobs:
|
||||
echo "---"
|
||||
ls -lh $PWD/alsa-lib/src/.libs
|
||||
|
||||
p=$PWD
|
||||
|
||||
export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
|
||||
export SHERPA_ONNX_ENABLE_ALSA=1
|
||||
export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_GPU=ON"
|
||||
export SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_GPU=ON -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"
|
||||
|
||||
onnxruntime_version=${{ matrix.onnxruntime_version }}
|
||||
if [[ $onnxruntime_version == "1.22.0" ]]; then
|
||||
|
||||
4
.github/workflows/build-wheels-linux.yaml
vendored
4
.github/workflows/build-wheels-linux.yaml
vendored
@ -82,6 +82,7 @@ jobs:
|
||||
popd
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
mkdir build
|
||||
@ -326,11 +327,12 @@ jobs:
|
||||
CIBW_ENVIRONMENT: >
|
||||
SHERPA_ONNX_SPLIT_PYTHON_PACKAGE=ON
|
||||
CPLUS_INCLUDE_PATH=/project/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
C_INCLUDE_PATH=/project/alsa-lib/include:$C_INCLUDE_PATH
|
||||
SHERPA_ONNX_ALSA_LIB_DIR=/project/alsa-lib/src/.libs
|
||||
LD_LIBRARY_PATH=/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib:$SHERPA_ONNX_ALSA_LIB_DIR
|
||||
SHERPA_ONNX_MAKE_ARGS="VERBOSE=1"
|
||||
SHERPA_ONNX_ENABLE_ALSA=1
|
||||
SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF"
|
||||
SHERPA_ONNX_CMAKE_ARGS="-DSHERPA_ONNX_ENABLE_BINARY=OFF -DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF -DSHERPA_ONNX_ENABLE_C_API=OFF -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF -DALSA_INCLUDE_DIR=$p/alsa-lib/include -DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so"
|
||||
|
||||
CIBW_BUILD: "${{ matrix.python-version}}-* "
|
||||
CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686"
|
||||
|
||||
5
.github/workflows/linux-gpu.yaml
vendored
5
.github/workflows/linux-gpu.yaml
vendored
@ -82,12 +82,17 @@ jobs:
|
||||
popd
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
p=$PWD
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||
-D CMAKE_INSTALL_PREFIX=./install \
|
||||
-D BUILD_SHARED_LIBS=ON \
|
||||
|
||||
4
.github/workflows/linux-jni-aarch64.yaml
vendored
4
.github/workflows/linux-jni-aarch64.yaml
vendored
@ -80,12 +80,16 @@ jobs:
|
||||
popd
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
p=$PWD
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-D SHERPA_ONNX_ENABLE_TTS=ON \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D BUILD_SHARED_LIBS=ON \
|
||||
|
||||
5
.github/workflows/linux-jni.yaml
vendored
5
.github/workflows/linux-jni.yaml
vendored
@ -113,12 +113,17 @@ jobs:
|
||||
popd
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
p=$PWD
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-D SHERPA_ONNX_ENABLE_TTS=ON \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D BUILD_SHARED_LIBS=ON \
|
||||
|
||||
5
.github/workflows/linux.yaml
vendored
5
.github/workflows/linux.yaml
vendored
@ -110,12 +110,17 @@ jobs:
|
||||
popd
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
p=$PWD
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-D SHERPA_ONNX_ENABLE_TTS=${{ matrix.with_tts }} \
|
||||
-D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||
-D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} \
|
||||
|
||||
4
.github/workflows/rknn-linux-aarch64.yaml
vendored
4
.github/workflows/rknn-linux-aarch64.yaml
vendored
@ -100,7 +100,9 @@ jobs:
|
||||
strings $PWD/alsa-lib/src/.libs/libasound.so.2.0.0 | grep "^GLIBC"
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
p=$PWD
|
||||
|
||||
export SHERPA_ONNX_RKNN_TOOLKIT2_PATH=$PWD/rknn-toolkit2
|
||||
export SHERPA_ONNX_RKNN_TOOLKIT2_LIB_DIR=$SHERPA_ONNX_RKNN_TOOLKIT2_PATH/rknpu2/runtime/Linux/librknn_api/aarch64
|
||||
@ -114,6 +116,8 @@ jobs:
|
||||
BUILD_SHARED_LIBS=${{ matrix.shared }}
|
||||
|
||||
cmake \
|
||||
-DALSA_INCLUDE_DIR=$p/alsa-lib/include \
|
||||
-DALSA_LIBRARY=$p/alsa-lib/src/.libs/libasound.so \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DCMAKE_INSTALL_PREFIX=./install \
|
||||
-DSHERPA_ONNX_ENABLE_RKNN=ON \
|
||||
|
||||
@ -310,7 +310,7 @@ if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM AND NOT CMAKE_SYSTEM_NAME
|
||||
Could not find alsa/asoundlib.h !
|
||||
We won't build sherpa-onnx-alsa
|
||||
To fix that, please do:
|
||||
(1) sudo apt-get install alsa-utils libasound2-dev
|
||||
(1) sudo apt-get install alsa-utils libasound2-dev pkg-config
|
||||
(2) rm -rf build
|
||||
(3) re-try
|
||||
")
|
||||
|
||||
@ -72,6 +72,7 @@ if [ ! -f alsa-lib/src/.libs/libasound.so ]; then
|
||||
fi
|
||||
|
||||
export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
|
||||
export C_INCLUDE_PATH=$PWD/alsa-lib/include:$C_INCLUDE_PATH
|
||||
export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
|
||||
|
||||
if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then
|
||||
|
||||
@ -73,9 +73,11 @@ def get_binaries():
|
||||
"sherpa-onnx-vad",
|
||||
"sherpa-onnx-vad-microphone",
|
||||
"sherpa-onnx-vad-microphone-offline-asr",
|
||||
"sherpa-onnx-vad-microphone-simulated-streaming-asr",
|
||||
"sherpa-onnx-vad-with-offline-asr",
|
||||
"sherpa-onnx-vad-with-online-asr",
|
||||
"sherpa-onnx-version",
|
||||
"sherpa-onnx-pa-devs",
|
||||
]
|
||||
|
||||
if enable_alsa():
|
||||
|
||||
@ -29,6 +29,7 @@ function(download_portaudio)
|
||||
# Always use static build
|
||||
set(PA_BUILD_SHARED OFF CACHE BOOL "" FORCE)
|
||||
set(PA_BUILD_STATIC ON CACHE BOOL "" FORCE)
|
||||
set(PA_BUILD_EXAMPLES ON CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_Declare(portaudio
|
||||
URL
|
||||
@ -50,6 +51,15 @@ function(download_portaudio)
|
||||
endif()
|
||||
|
||||
add_subdirectory(${portaudio_SOURCE_DIR} ${portaudio_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL Linux)
|
||||
if(PA_USE_ALSA)
|
||||
message(STATUS "portaudio with ALSA")
|
||||
else()
|
||||
message(STATUS "portaudio without ALSA")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(pa_devs PROPERTIES OUTPUT_NAME "sherpa-onnx-pa-devs")
|
||||
|
||||
set_target_properties(portaudio_static PROPERTIES OUTPUT_NAME "sherpa-onnx-portaudio_static")
|
||||
if(NOT WIN32)
|
||||
@ -62,6 +72,11 @@ function(download_portaudio)
|
||||
DESTINATION lib)
|
||||
endif()
|
||||
|
||||
install(TARGETS
|
||||
pa_devs
|
||||
DESTINATION bin)
|
||||
add_custom_target(build_pa_devs ALL DEPENDS pa_devs)
|
||||
|
||||
endfunction()
|
||||
|
||||
download_portaudio()
|
||||
|
||||
@ -518,6 +518,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
microphone.cc
|
||||
)
|
||||
|
||||
add_executable(sherpa-onnx-vad-microphone-simulated-streaming-asr
|
||||
sherpa-onnx-vad-microphone-simulated-streaming-asr.cc
|
||||
microphone.cc
|
||||
)
|
||||
|
||||
add_executable(sherpa-onnx-vad-with-offline-asr
|
||||
sherpa-onnx-vad-with-offline-asr.cc
|
||||
)
|
||||
@ -548,6 +553,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
|
||||
sherpa-onnx-microphone-offline-audio-tagging
|
||||
sherpa-onnx-microphone-offline-speaker-identification
|
||||
sherpa-onnx-vad-microphone
|
||||
sherpa-onnx-vad-microphone-simulated-streaming-asr
|
||||
sherpa-onnx-vad-microphone-offline-asr
|
||||
sherpa-onnx-vad-with-offline-asr
|
||||
sherpa-onnx-vad-with-online-asr
|
||||
|
||||
73
sherpa-onnx/csrc/sherpa-display.h
Normal file
73
sherpa-onnx/csrc/sherpa-display.h
Normal file
@ -0,0 +1,73 @@
|
||||
// sherpa-onnx/csrc/sherpa-display.h
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
#pragma once
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
class SherpaDisplay {
|
||||
public:
|
||||
void UpdateText(const std::string &text) { current_text_ = text; }
|
||||
|
||||
void FinalizeCurrentSentence() {
|
||||
if (!current_text_.empty() &&
|
||||
(current_text_[0] != ' ' || current_text_.size() > 1)) {
|
||||
sentences_.push_back({GetCurrentDateTime(), std::move(current_text_)});
|
||||
}
|
||||
}
|
||||
|
||||
void Display() const {
|
||||
if (!sentences_.empty() || !current_text_.empty()) {
|
||||
ClearScreen();
|
||||
}
|
||||
|
||||
printf("=== Speech Recognition with Next-gen Kaldi ===\n");
|
||||
printf("------------------------------\n");
|
||||
if (!sentences_.empty()) {
|
||||
int32_t i = 1;
|
||||
for (const auto &p : sentences_) {
|
||||
printf("[%s] %d. %s\n", p.first.c_str(), i, p.second.c_str());
|
||||
i += 1;
|
||||
}
|
||||
|
||||
printf("------------------------------\n");
|
||||
}
|
||||
|
||||
if (!current_text_.empty()) {
|
||||
printf("Recognizing: %s\n", current_text_.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static void ClearScreen() {
|
||||
#ifdef _MSC_VER
|
||||
auto ret = system("cls");
|
||||
#else
|
||||
auto ret = system("clear");
|
||||
#endif
|
||||
(void)ret;
|
||||
}
|
||||
|
||||
static std::string GetCurrentDateTime() {
|
||||
std::ostringstream os;
|
||||
auto t = std::time(nullptr);
|
||||
auto tm = std::localtime(&t);
|
||||
os << std::put_time(tm, "%Y-%m-%d %H:%M:%S");
|
||||
return os.str();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<std::string, std::string>> sentences_;
|
||||
std::string current_text_;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
@ -0,0 +1,268 @@
|
||||
// sherpa-onnx/csrc/sherpa-onnx-vad-microphone-simulated-streaming-asr.cc
|
||||
//
|
||||
// Copyright (c) 2025 Xiaomi Corporation
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono> // NOLINT
|
||||
#include <condition_variable> // NOLINT
|
||||
#include <mutex> // NOLINT
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "portaudio.h" // NOLINT
|
||||
#include "sherpa-onnx/csrc/circular-buffer.h"
|
||||
#include "sherpa-onnx/csrc/microphone.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/resample.h"
|
||||
#include "sherpa-onnx/csrc/sherpa-display.h"
|
||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||
|
||||
std::queue<std::vector<float>> samples_queue;
|
||||
std::condition_variable condition_variable;
|
||||
std::mutex mutex;
|
||||
bool stop = false;
|
||||
|
||||
static int32_t RecordCallback(const void *input_buffer,
|
||||
void * /*output_buffer*/,
|
||||
unsigned long frames_per_buffer, // NOLINT
|
||||
const PaStreamCallbackTimeInfo * /*time_info*/,
|
||||
PaStreamCallbackFlags /*status_flags*/,
|
||||
void * /*user_data*/) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
samples_queue.emplace(
|
||||
reinterpret_cast<const float *>(input_buffer),
|
||||
reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
|
||||
condition_variable.notify_one();
|
||||
|
||||
return stop ? paComplete : paContinue;
|
||||
}
|
||||
|
||||
static void Handler(int32_t /*sig*/) {
|
||||
stop = true;
|
||||
condition_variable.notify_one();
|
||||
fprintf(stdout, "\nCaught Ctrl + C. Exiting...\n");
|
||||
}
|
||||
|
||||
int32_t main(int32_t argc, char *argv[]) {
|
||||
signal(SIGINT, Handler);
|
||||
|
||||
const char *kUsageMessage = R"usage(
|
||||
This program shows how to use a streaming VAD with non-streaming ASR in
|
||||
sherpa-onnx for real-time speech recognition.
|
||||
|
||||
(1) SenseVoice
|
||||
|
||||
cd /path/to/sherpa-onnx/build
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
|
||||
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
|
||||
./bin/sherpa-onnx-vad-microphone-simulated-streaming-asr \
|
||||
--silero-vad-model=./silero_vad.onnx \
|
||||
--sense-voice-model=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/model.int8.onnx \
|
||||
--tokens=./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2024-07-17/tokens.txt
|
||||
|
||||
(2) Parakeet TDT 0.6b v2
|
||||
|
||||
cd /path/to/sherpa-onnx/build
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
|
||||
./bin/sherpa-onnx-vad-microphone-simulated-streaming-asr \
|
||||
--silero-vad-model=./silero_vad.onnx \
|
||||
--encoder=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx \
|
||||
--decoder=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx \
|
||||
--joiner=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx \
|
||||
--tokens=./sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt
|
||||
|
||||
(3) Please refer to our doc for more non-streaming ASR models,
|
||||
e.g., zipformer, paraformer, whisper, etc.
|
||||
|
||||
Please first use ./bin/sherpa-onnx-offline to test the RTF of the model.
|
||||
A model with RTF < 0.2 should work with this program.
|
||||
)usage";
|
||||
|
||||
sherpa_onnx::ParseOptions po(kUsageMessage);
|
||||
sherpa_onnx::VadModelConfig vad_config;
|
||||
|
||||
sherpa_onnx::OfflineRecognizerConfig asr_config;
|
||||
|
||||
vad_config.Register(&po);
|
||||
asr_config.Register(&po);
|
||||
|
||||
int32_t user_device_index = -1; // -1 means to use default value
|
||||
int32_t user_sample_rate = -1; // -1 means to use default value
|
||||
|
||||
po.Register("mic-device-index", &user_device_index,
|
||||
"If provided, we use it to replace the default device index."
|
||||
"You can use sherpa-onnx-pa-devs to list available devices");
|
||||
|
||||
po.Register("mic-sample-rate", &user_sample_rate,
|
||||
"If provided, we use it to replace the default sample rate."
|
||||
"You can use sherpa-onnx-pa-devs to list sample rate of "
|
||||
"available devices");
|
||||
|
||||
if (argc == 1) {
|
||||
po.PrintUsage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
po.Read(argc, argv);
|
||||
if (po.NumArgs() != 0) {
|
||||
po.PrintUsage();
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
fprintf(stdout, "%s\n", vad_config.ToString().c_str());
|
||||
fprintf(stdout, "%s\n", asr_config.ToString().c_str());
|
||||
|
||||
if (!vad_config.Validate()) {
|
||||
fprintf(stdout, "Errors in vad_config!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!asr_config.Validate()) {
|
||||
fprintf(stdout, "Errors in asr_config!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stdout, "Creating recognizer ...\n");
|
||||
sherpa_onnx::OfflineRecognizer recognizer(asr_config);
|
||||
fprintf(stdout, "Recognizer created!\n");
|
||||
|
||||
sherpa_onnx::Microphone mic;
|
||||
|
||||
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||
if (device_index == paNoDevice) {
|
||||
fprintf(stdout, "No default input device found\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (user_device_index >= 0) {
|
||||
fprintf(stdout, "Use specified device: %d\n", user_device_index);
|
||||
device_index = user_device_index;
|
||||
} else {
|
||||
fprintf(stdout, "Use default device: %d\n", device_index);
|
||||
}
|
||||
|
||||
mic.PrintDevices(device_index);
|
||||
|
||||
float mic_sample_rate = 16000;
|
||||
if (user_sample_rate > 0) {
|
||||
fprintf(stdout, "Use sample rate %d for mic\n", user_sample_rate);
|
||||
mic_sample_rate = user_sample_rate;
|
||||
}
|
||||
|
||||
if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
|
||||
nullptr)) {
|
||||
fprintf(stdout, "Failed to open device %d\n", device_index);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
float sample_rate = 16000;
|
||||
std::unique_ptr<sherpa_onnx::LinearResample> resampler;
|
||||
if (mic_sample_rate != sample_rate) {
|
||||
float min_freq = std::min(mic_sample_rate, sample_rate);
|
||||
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
||||
|
||||
int32_t lowpass_filter_width = 6;
|
||||
resampler = std::make_unique<sherpa_onnx::LinearResample>(
|
||||
mic_sample_rate, sample_rate, lowpass_cutoff, lowpass_filter_width);
|
||||
}
|
||||
|
||||
auto vad = std::make_unique<sherpa_onnx::VoiceActivityDetector>(vad_config);
|
||||
|
||||
int32_t window_size = vad_config.silero_vad.window_size;
|
||||
|
||||
int32_t offset = 0;
|
||||
bool speech_started = false;
|
||||
std::vector<float> buffer;
|
||||
|
||||
auto started_time = std::chrono::steady_clock::now();
|
||||
sherpa_onnx::SherpaDisplay display;
|
||||
|
||||
fprintf(stdout, "Started. Please speak\n");
|
||||
std::vector<float> resampled;
|
||||
|
||||
while (!stop) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
while (samples_queue.empty() && !stop) {
|
||||
condition_variable.wait(lock);
|
||||
}
|
||||
|
||||
if (stop) {
|
||||
break;
|
||||
}
|
||||
|
||||
const auto &s = samples_queue.front();
|
||||
if (!resampler) {
|
||||
buffer.insert(buffer.end(), s.begin(), s.end());
|
||||
} else {
|
||||
resampler->Resample(s.data(), s.size(), false, &resampled);
|
||||
buffer.insert(buffer.end(), resampled.begin(), resampled.end());
|
||||
}
|
||||
|
||||
samples_queue.pop();
|
||||
}
|
||||
|
||||
for (; offset + window_size < buffer.size(); offset += window_size) {
|
||||
vad->AcceptWaveform(buffer.data() + offset, window_size);
|
||||
if (!speech_started && vad->IsSpeechDetected()) {
|
||||
speech_started = true;
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
}
|
||||
|
||||
if (!speech_started) {
|
||||
if (buffer.size() > 10 * window_size) {
|
||||
offset -= buffer.size() - 10 * window_size;
|
||||
buffer = {buffer.end() - 10 * window_size, buffer.end()};
|
||||
}
|
||||
}
|
||||
|
||||
auto current_time = std::chrono::steady_clock::now();
|
||||
const float elapsed_seconds =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
|
||||
started_time)
|
||||
.count() /
|
||||
1000.;
|
||||
|
||||
if (speech_started && elapsed_seconds > 0.2) {
|
||||
auto s = recognizer.CreateStream();
|
||||
s->AcceptWaveform(sample_rate, buffer.data(), buffer.size());
|
||||
recognizer.DecodeStream(s.get());
|
||||
const auto &result = s->GetResult();
|
||||
display.UpdateText(result.text);
|
||||
display.Display();
|
||||
|
||||
started_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
while (!vad->Empty()) {
|
||||
// when stopping speak, this while loop is executed
|
||||
|
||||
vad->Pop();
|
||||
|
||||
display.FinalizeCurrentSentence();
|
||||
display.Display();
|
||||
|
||||
buffer.clear();
|
||||
offset = 0;
|
||||
speech_started = false;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user