mirror of
https://github.com/k2-fsa/sherpa-onnx.git
synced 2026-01-09 07:41:06 +08:00
Add Swift API for MatchaTTS models. (#1684)
This commit is contained in:
parent
1fe5fe495f
commit
6f085babcc
18
.github/scripts/test-swift.sh
vendored
18
.github/scripts/test-swift.sh
vendored
@ -7,6 +7,18 @@ echo "pwd: $PWD"
|
||||
cd swift-api-examples
|
||||
ls -lh
|
||||
|
||||
./run-tts-vits.sh
|
||||
ls -lh
|
||||
rm -rf vits-piper-*
|
||||
|
||||
./run-tts-matcha-zh.sh
|
||||
ls -lh
|
||||
rm -rf matcha-icefall-*
|
||||
|
||||
./run-tts-matcha-en.sh
|
||||
ls -lh
|
||||
rm -rf matcha-icefall-*
|
||||
|
||||
./run-speaker-diarization.sh
|
||||
rm -rf *.onnx
|
||||
rm -rf sherpa-onnx-pyannote-segmentation-3-0
|
||||
@ -38,8 +50,9 @@ popd
|
||||
ls -lh /Users/fangjun/Desktop
|
||||
cat /Users/fangjun/Desktop/Obama.srt
|
||||
|
||||
./run-tts.sh
|
||||
ls -lh
|
||||
rm -rf sherpa-onnx-whisper*
|
||||
rm -f *.onnx
|
||||
rm /Users/fangjun/Desktop/Obama.wav
|
||||
|
||||
./run-decode-file.sh
|
||||
rm decode-file
|
||||
@ -48,5 +61,4 @@ sed -i.bak '20d' ./decode-file.swift
|
||||
|
||||
./run-decode-file-non-streaming.sh
|
||||
|
||||
|
||||
ls -lh
|
||||
|
||||
@ -31,7 +31,7 @@ fi
|
||||
# to download more models
|
||||
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
fi
|
||||
|
||||
|
||||
@ -350,7 +350,7 @@ node ./test_vad_asr_non_streaming_sense_voice_microphone.js
|
||||
### Text-to-speech with MatchaTTS models (English TTS)
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||
|
||||
@ -70,7 +70,7 @@ You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||
|
||||
4
swift-api-examples/.gitignore
vendored
4
swift-api-examples/.gitignore
vendored
@ -2,7 +2,7 @@ decode-file
|
||||
decode-file-non-streaming
|
||||
generate-subtitles
|
||||
spoken-language-identification
|
||||
tts
|
||||
tts-vits
|
||||
vits-vctk
|
||||
sherpa-onnx-paraformer-zh-2023-09-14
|
||||
!*.sh
|
||||
@ -10,3 +10,5 @@ sherpa-onnx-paraformer-zh-2023-09-14
|
||||
streaming-hlg-decode-file
|
||||
keyword-spotting-from-file
|
||||
add-punctuations
|
||||
tts-matcha-zh
|
||||
tts-matcha-en
|
||||
|
||||
@ -719,9 +719,9 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
|
||||
|
||||
// offline tts
|
||||
func sherpaOnnxOfflineTtsVitsModelConfig(
|
||||
model: String,
|
||||
lexicon: String,
|
||||
tokens: String,
|
||||
model: String = "",
|
||||
lexicon: String = "",
|
||||
tokens: String = "",
|
||||
dataDir: String = "",
|
||||
noiseScale: Float = 0.667,
|
||||
noiseScaleW: Float = 0.8,
|
||||
@ -739,8 +739,30 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
|
||||
dict_dir: toCPointer(dictDir))
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineTtsMatchaModelConfig(
|
||||
acousticModel: String = "",
|
||||
vocoder: String = "",
|
||||
lexicon: String = "",
|
||||
tokens: String = "",
|
||||
dataDir: String = "",
|
||||
noiseScale: Float = 0.667,
|
||||
lengthScale: Float = 1.0,
|
||||
dictDir: String = ""
|
||||
) -> SherpaOnnxOfflineTtsMatchaModelConfig {
|
||||
return SherpaOnnxOfflineTtsMatchaModelConfig(
|
||||
acoustic_model: toCPointer(acousticModel),
|
||||
vocoder: toCPointer(vocoder),
|
||||
lexicon: toCPointer(lexicon),
|
||||
tokens: toCPointer(tokens),
|
||||
data_dir: toCPointer(dataDir),
|
||||
noise_scale: noiseScale,
|
||||
length_scale: lengthScale,
|
||||
dict_dir: toCPointer(dictDir))
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineTtsModelConfig(
|
||||
vits: SherpaOnnxOfflineTtsVitsModelConfig,
|
||||
vits: SherpaOnnxOfflineTtsVitsModelConfig = sherpaOnnxOfflineTtsVitsModelConfig(),
|
||||
matcha: SherpaOnnxOfflineTtsMatchaModelConfig = sherpaOnnxOfflineTtsMatchaModelConfig(),
|
||||
numThreads: Int = 1,
|
||||
debug: Int = 0,
|
||||
provider: String = "cpu"
|
||||
@ -749,7 +771,8 @@ func sherpaOnnxOfflineTtsModelConfig(
|
||||
vits: vits,
|
||||
num_threads: Int32(numThreads),
|
||||
debug: Int32(debug),
|
||||
provider: toCPointer(provider)
|
||||
provider: toCPointer(provider),
|
||||
matcha: matcha
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
42
swift-api-examples/run-tts-matcha-en.sh
Executable file
42
swift-api-examples/run-tts-matcha-en.sh
Executable file
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ../build-swift-macos ]; then
|
||||
echo "Please run ../build-swift-macos.sh first!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# please visit
|
||||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||
# to download more models
|
||||
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||
fi
|
||||
|
||||
if [ ! -e ./tts ]; then
|
||||
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||
swiftc \
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./tts-matcha-en.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o tts-matcha-en
|
||||
|
||||
strip tts-matcha-en
|
||||
else
|
||||
echo "./tts-matcha-en exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./tts-matcha-en
|
||||
41
swift-api-examples/run-tts-matcha-zh.sh
Executable file
41
swift-api-examples/run-tts-matcha-zh.sh
Executable file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ../build-swift-macos ]; then
|
||||
echo "Please run ../build-swift-macos.sh first!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# please visit
|
||||
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||
# to download more models
|
||||
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
|
||||
tar xvf matcha-icefall-zh-baker.tar.bz2
|
||||
rm matcha-icefall-zh-baker.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||
fi
|
||||
|
||||
if [ ! -e ./tts ]; then
|
||||
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||
swiftc \
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./tts-matcha-zh.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o tts-matcha-zh
|
||||
|
||||
strip tts-matcha-zh
|
||||
else
|
||||
echo "./tts-matcha-zh exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./tts-matcha-zh
|
||||
@ -21,16 +21,16 @@ if [ ! -e ./tts ]; then
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./tts.swift ./SherpaOnnx.swift \
|
||||
./tts-vits.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o tts
|
||||
-o tts-vits
|
||||
|
||||
strip tts
|
||||
strip tts-vits
|
||||
else
|
||||
echo "./tts exists - skip building"
|
||||
echo "./tts-vits exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./tts
|
||||
./tts-vits
|
||||
65
swift-api-examples/tts-matcha-en.swift
Normal file
65
swift-api-examples/tts-matcha-en.swift
Normal file
@ -0,0 +1,65 @@
|
||||
class MyClass {
|
||||
func playSamples(samples: [Float]) {
|
||||
print("Play \(samples.count) samples")
|
||||
}
|
||||
}
|
||||
|
||||
func run() {
|
||||
let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
|
||||
let vocoder = "./hifigan_v2.onnx"
|
||||
let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"
|
||||
let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"
|
||||
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
|
||||
acousticModel: acousticModel,
|
||||
vocoder: vocoder,
|
||||
tokens: tokens,
|
||||
dataDir: dataDir
|
||||
)
|
||||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
|
||||
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
|
||||
|
||||
let myClass = MyClass()
|
||||
|
||||
// We use Unretained here so myClass must be kept alive as the callback is invoked
|
||||
//
|
||||
// See also
|
||||
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
|
||||
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
|
||||
|
||||
let callback: TtsCallbackWithArg = { samples, n, arg in
|
||||
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
|
||||
var savedSamples: [Float] = []
|
||||
for index in 0..<n {
|
||||
savedSamples.append(samples![Int(index)])
|
||||
}
|
||||
|
||||
o.playSamples(samples: savedSamples)
|
||||
|
||||
// return 1 so that it continues generating
|
||||
return 1
|
||||
}
|
||||
|
||||
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
|
||||
|
||||
let text =
|
||||
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
|
||||
let sid = 0
|
||||
let speed: Float = 1.0
|
||||
|
||||
let audio = tts.generateWithCallbackWithArg(
|
||||
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
|
||||
let filename = "test-matcha-en.wav"
|
||||
let ok = audio.save(filename: filename)
|
||||
if ok == 1 {
|
||||
print("\nSaved to:\(filename)")
|
||||
} else {
|
||||
print("Failed to save to \(filename)")
|
||||
}
|
||||
}
|
||||
|
||||
@main
|
||||
struct App {
|
||||
static func main() {
|
||||
run()
|
||||
}
|
||||
}
|
||||
68
swift-api-examples/tts-matcha-zh.swift
Normal file
68
swift-api-examples/tts-matcha-zh.swift
Normal file
@ -0,0 +1,68 @@
|
||||
class MyClass {
|
||||
func playSamples(samples: [Float]) {
|
||||
print("Play \(samples.count) samples")
|
||||
}
|
||||
}
|
||||
|
||||
func run() {
|
||||
let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"
|
||||
let vocoder = "./hifigan_v2.onnx"
|
||||
let lexicon = "./matcha-icefall-zh-baker/lexicon.txt"
|
||||
let tokens = "./matcha-icefall-zh-baker/tokens.txt"
|
||||
let dictDir = "./matcha-icefall-zh-baker/dict"
|
||||
let ruleFsts =
|
||||
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
|
||||
let matcha = sherpaOnnxOfflineTtsMatchaModelConfig(
|
||||
acousticModel: acousticModel,
|
||||
vocoder: vocoder,
|
||||
lexicon: lexicon,
|
||||
tokens: tokens,
|
||||
dictDir: dictDir
|
||||
)
|
||||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(matcha: matcha, debug: 0)
|
||||
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig, ruleFsts: ruleFsts)
|
||||
|
||||
let myClass = MyClass()
|
||||
|
||||
// We use Unretained here so myClass must be kept alive as the callback is invoked
|
||||
//
|
||||
// See also
|
||||
// https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6
|
||||
let arg = Unmanaged<MyClass>.passUnretained(myClass).toOpaque()
|
||||
|
||||
let callback: TtsCallbackWithArg = { samples, n, arg in
|
||||
let o = Unmanaged<MyClass>.fromOpaque(arg!).takeUnretainedValue()
|
||||
var savedSamples: [Float] = []
|
||||
for index in 0..<n {
|
||||
savedSamples.append(samples![Int(index)])
|
||||
}
|
||||
|
||||
o.playSamples(samples: savedSamples)
|
||||
|
||||
// return 1 so that it continues generating
|
||||
return 1
|
||||
}
|
||||
|
||||
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
|
||||
|
||||
let text = "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
|
||||
let sid = 0
|
||||
let speed: Float = 1.0
|
||||
|
||||
let audio = tts.generateWithCallbackWithArg(
|
||||
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
|
||||
let filename = "test-matcha-zh.wav"
|
||||
let ok = audio.save(filename: filename)
|
||||
if ok == 1 {
|
||||
print("\nSaved to:\(filename)")
|
||||
} else {
|
||||
print("Failed to save to \(filename)")
|
||||
}
|
||||
}
|
||||
|
||||
@main
|
||||
struct App {
|
||||
static func main() {
|
||||
run()
|
||||
}
|
||||
}
|
||||
@ -47,7 +47,7 @@ func run() {
|
||||
|
||||
let audio = tts.generateWithCallbackWithArg(
|
||||
text: text, callback: callback, arg: arg, sid: sid, speed: speed)
|
||||
let filename = "test.wav"
|
||||
let filename = "test-vits-en.wav"
|
||||
let ok = audio.save(filename: filename)
|
||||
if ok == 1 {
|
||||
print("\nSaved to:\(filename)")
|
||||
Loading…
x
Reference in New Issue
Block a user