mirror of
https://github.com/k2-fsa/sherpa-onnx.git
synced 2026-01-09 07:41:06 +08:00
Add Swift API for ten-vad (#2387)
This commit is contained in:
parent
7f1d71fed3
commit
0514aeeb0c
4
.github/scripts/test-swift.sh
vendored
4
.github/scripts/test-swift.sh
vendored
@ -71,7 +71,11 @@ curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
./run-generate-subtitles-ten-vad.sh
|
||||
rm -rf *.onnx
|
||||
|
||||
./run-generate-subtitles.sh
|
||||
rm -rf *.onnx
|
||||
|
||||
ls -lh /Users/fangjun/Desktop
|
||||
cat /Users/fangjun/Desktop/Obama.srt
|
||||
|
||||
1
swift-api-examples/.gitignore
vendored
1
swift-api-examples/.gitignore
vendored
@ -1,6 +1,7 @@
|
||||
decode-file
|
||||
decode-file-non-streaming
|
||||
generate-subtitles
|
||||
generate-subtitles-ten-vad
|
||||
spoken-language-identification
|
||||
tts-vits
|
||||
vits-vctk
|
||||
|
||||
@ -386,6 +386,22 @@ func sherpaOnnxOfflineWhisperModelConfig(
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineCanaryModelConfig(
|
||||
encoder: String = "",
|
||||
decoder: String = "",
|
||||
srcLang: String = "en",
|
||||
tgtLang: String = "en",
|
||||
usePnc: Bool = true
|
||||
) -> SherpaOnnxOfflineCanaryModelConfig {
|
||||
return SherpaOnnxOfflineCanaryModelConfig(
|
||||
encoder: toCPointer(encoder),
|
||||
decoder: toCPointer(decoder),
|
||||
src_lang: toCPointer(srcLang),
|
||||
tgt_lang: toCPointer(tgtLang),
|
||||
use_pnc: usePnc ? 1 : 0
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineFireRedAsrModelConfig(
|
||||
encoder: String = "",
|
||||
decoder: String = ""
|
||||
@ -459,7 +475,8 @@ func sherpaOnnxOfflineModelConfig(
|
||||
fireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig = sherpaOnnxOfflineFireRedAsrModelConfig(),
|
||||
dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(),
|
||||
zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig =
|
||||
sherpaOnnxOfflineZipformerCtcModelConfig()
|
||||
sherpaOnnxOfflineZipformerCtcModelConfig(),
|
||||
canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig()
|
||||
) -> SherpaOnnxOfflineModelConfig {
|
||||
return SherpaOnnxOfflineModelConfig(
|
||||
transducer: transducer,
|
||||
@ -479,7 +496,8 @@ func sherpaOnnxOfflineModelConfig(
|
||||
moonshine: moonshine,
|
||||
fire_red_asr: fireRedAsr,
|
||||
dolphin: dolphin,
|
||||
zipformer_ctc: zipformerCtc
|
||||
zipformer_ctc: zipformerCtc,
|
||||
canary: canary
|
||||
)
|
||||
}
|
||||
|
||||
@ -607,10 +625,14 @@ class SherpaOnnxOfflineRecognizer {
|
||||
|
||||
return SherpaOnnxOfflineRecongitionResult(result: result)
|
||||
}
|
||||
|
||||
func setConfig(config: UnsafePointer<SherpaOnnxOfflineRecognizerConfig>!) {
|
||||
SherpaOnnxOfflineRecognizerSetConfig(recognizer, config)
|
||||
}
|
||||
}
|
||||
|
||||
func sherpaOnnxSileroVadModelConfig(
|
||||
model: String,
|
||||
model: String = "",
|
||||
threshold: Float = 0.5,
|
||||
minSilenceDuration: Float = 0.25,
|
||||
minSpeechDuration: Float = 0.5,
|
||||
@ -627,19 +649,39 @@ func sherpaOnnxSileroVadModelConfig(
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxTenVadModelConfig(
|
||||
model: String = "",
|
||||
threshold: Float = 0.5,
|
||||
minSilenceDuration: Float = 0.25,
|
||||
minSpeechDuration: Float = 0.5,
|
||||
windowSize: Int = 256,
|
||||
maxSpeechDuration: Float = 5.0
|
||||
) -> SherpaOnnxTenVadModelConfig {
|
||||
return SherpaOnnxTenVadModelConfig(
|
||||
model: toCPointer(model),
|
||||
threshold: threshold,
|
||||
min_silence_duration: minSilenceDuration,
|
||||
min_speech_duration: minSpeechDuration,
|
||||
window_size: Int32(windowSize),
|
||||
max_speech_duration: maxSpeechDuration
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxVadModelConfig(
|
||||
sileroVad: SherpaOnnxSileroVadModelConfig,
|
||||
sileroVad: SherpaOnnxSileroVadModelConfig = sherpaOnnxSileroVadModelConfig(),
|
||||
sampleRate: Int32 = 16000,
|
||||
numThreads: Int = 1,
|
||||
provider: String = "cpu",
|
||||
debug: Int = 0
|
||||
debug: Int = 0,
|
||||
tenVad: SherpaOnnxTenVadModelConfig = sherpaOnnxTenVadModelConfig()
|
||||
) -> SherpaOnnxVadModelConfig {
|
||||
return SherpaOnnxVadModelConfig(
|
||||
silero_vad: sileroVad,
|
||||
sample_rate: sampleRate,
|
||||
num_threads: Int32(numThreads),
|
||||
provider: toCPointer(provider),
|
||||
debug: Int32(debug)
|
||||
debug: Int32(debug),
|
||||
ten_vad: tenVad
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -156,11 +156,35 @@ func run() {
|
||||
assert(audioFormat.channelCount == 1)
|
||||
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
|
||||
|
||||
let sileroVadConfig = sherpaOnnxSileroVadModelConfig(
|
||||
model: "./silero_vad.onnx"
|
||||
)
|
||||
var sileroVadConfig = sherpaOnnxSileroVadModelConfig()
|
||||
var tenVadConfig = sherpaOnnxTenVadModelConfig()
|
||||
|
||||
var windowSize = 0
|
||||
|
||||
if FileManager.default.fileExists(atPath: "./silero_vad.onnx") {
|
||||
sileroVadConfig = sherpaOnnxSileroVadModelConfig(
|
||||
model: "./silero_vad.onnx",
|
||||
threshold: 0.25,
|
||||
windowSize: 512
|
||||
)
|
||||
windowSize = 512
|
||||
print("Use silero-vad")
|
||||
} else if FileManager.default.fileExists(atPath: "./ten-vad.onnx") {
|
||||
tenVadConfig = sherpaOnnxTenVadModelConfig(
|
||||
model: "./ten-vad.onnx",
|
||||
threshold: 0.25,
|
||||
windowSize: 256
|
||||
)
|
||||
windowSize = 256
|
||||
print("Use ten-vad")
|
||||
} else {
|
||||
print("Please provide ./silero_vad.onnx or ./ten-vad.onnx")
|
||||
return
|
||||
}
|
||||
|
||||
var vadModelConfig = sherpaOnnxVadModelConfig(
|
||||
sileroVad: sileroVadConfig, tenVad: tenVadConfig)
|
||||
|
||||
var vadModelConfig = sherpaOnnxVadModelConfig(sileroVad: sileroVadConfig)
|
||||
let vad = SherpaOnnxVoiceActivityDetectorWrapper(
|
||||
config: &vadModelConfig, buffer_size_in_seconds: 120)
|
||||
|
||||
@ -170,8 +194,6 @@ func run() {
|
||||
try! audioFile.read(into: audioFileBuffer!)
|
||||
var array: [Float]! = audioFileBuffer?.array()
|
||||
|
||||
let windowSize = Int(vadModelConfig.silero_vad.window_size)
|
||||
|
||||
var segments: [SpeechSegment] = []
|
||||
|
||||
for offset in stride(from: 0, to: array.count, by: windowSize) {
|
||||
@ -180,7 +202,6 @@ func run() {
|
||||
}
|
||||
|
||||
vad.flush()
|
||||
var index: Int = 0
|
||||
while !vad.isEmpty() {
|
||||
let s = vad.front()
|
||||
vad.pop()
|
||||
|
||||
46
swift-api-examples/run-generate-subtitles-ten-vad.sh
Executable file
46
swift-api-examples/run-generate-subtitles-ten-vad.sh
Executable file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ../build-swift-macos ]; then
|
||||
echo "Please run ../build-swift-macos.sh first!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
|
||||
echo "Please download the pre-trained model for testing."
|
||||
echo "You can refer to"
|
||||
echo ""
|
||||
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
|
||||
echo ""
|
||||
echo "for help"
|
||||
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
ls -lh sherpa-onnx-whisper-tiny.en
|
||||
fi
|
||||
if [ ! -f ./ten-vad.onnx ]; then
|
||||
echo "downloading ten-vad"
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx
|
||||
fi
|
||||
|
||||
if [ ! -e ./generate-subtitles-ten-vad ]; then
|
||||
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||
swiftc \
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./generate-subtitles.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o generate-subtitles-ten-vad
|
||||
|
||||
strip generate-subtitles-ten-vad
|
||||
else
|
||||
echo "./generate-subtitles-ten-vad exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./generate-subtitles-ten-vad
|
||||
Loading…
x
Reference in New Issue
Block a user