mirror of
https://github.com/k2-fsa/sherpa-onnx.git
synced 2026-01-09 07:41:06 +08:00
Generate tts samples for MatchaTTS (English). (#2527)
This commit is contained in:
parent
4694d675bd
commit
f1f8149a47
36
.github/workflows/generate-tts-samples.yaml
vendored
36
.github/workflows/generate-tts-samples.yaml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
pip install "numpy<=1.26.4" sherpa-onnx soundfile
|
||||
|
||||
- name: kitten
|
||||
if: true
|
||||
if: false
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
@ -68,3 +68,37 @@ jobs:
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
|
||||
popd
|
||||
rm -rf hf
|
||||
|
||||
- name: matcha en (ljspeech)
|
||||
if: true
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
cd scripts/matcha-tts/en/
|
||||
pwd=$PWD
|
||||
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples hf
|
||||
|
||||
mkdir -p ./hf/matcha/icefall-en-ljspeech/mp3
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx
|
||||
|
||||
python3 ./generate_samples.py
|
||||
|
||||
pushd hf
|
||||
git pull
|
||||
git add .
|
||||
git commit -m 'add matcha tts en (ljspeech) samples'
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-tts-samples main
|
||||
popd
|
||||
|
||||
rm -rf hf
|
||||
|
||||
38
scripts/matcha-tts/en/generate_samples.py
Executable file
38
scripts/matcha-tts/en/generate_samples.py
Executable file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||
"""
|
||||
Generate samples for
|
||||
https://k2-fsa.github.io/sherpa/onnx/tts/all/
|
||||
"""
|
||||
|
||||
|
||||
import sherpa_onnx
|
||||
import soundfile as sf
|
||||
|
||||
config = sherpa_onnx.OfflineTtsConfig(
|
||||
model=sherpa_onnx.OfflineTtsModelConfig(
|
||||
matcha=sherpa_onnx.OfflineTtsMatchaModelConfig(
|
||||
acoustic_model="matcha-icefall-en_US-ljspeech/model-steps-3.onnx",
|
||||
vocoder="vocos-22khz-univ.onnx",
|
||||
tokens="matcha-icefall-en_US-ljspeech/tokens.txt",
|
||||
lexicon="",
|
||||
data_dir="matcha-icefall-en_US-ljspeech/espeak-ng-data",
|
||||
),
|
||||
num_threads=2,
|
||||
),
|
||||
max_num_sentences=1,
|
||||
)
|
||||
|
||||
if not config.validate():
|
||||
raise ValueError("Please check your config")
|
||||
|
||||
tts = sherpa_onnx.OfflineTts(config)
|
||||
text = "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
|
||||
|
||||
audio = tts.generate(text, sid=0, speed=1.0)
|
||||
|
||||
sf.write(
|
||||
"./hf/matcha/icefall-en-ljspeech/mp3/0.mp3",
|
||||
audio.samples,
|
||||
samplerate=audio.sample_rate,
|
||||
)
|
||||
@ -18,8 +18,8 @@ void PybindOfflineTtsMatchaModelConfig(py::module *m) {
|
||||
.def(py::init<const std::string &, const std::string &,
|
||||
const std::string &, const std::string &,
|
||||
const std::string &, const std::string &, float, float>(),
|
||||
py::arg("acoustic_model"), py::arg("vocoder"), py::arg("lexicon"),
|
||||
py::arg("tokens"), py::arg("data_dir") = "",
|
||||
py::arg("acoustic_model"), py::arg("vocoder"),
|
||||
py::arg("lexicon") = "", py::arg("tokens"), py::arg("data_dir") = "",
|
||||
py::arg("dict_dir") = "", py::arg("noise_scale") = 1.0,
|
||||
py::arg("length_scale") = 1.0)
|
||||
.def_readwrite("acoustic_model", &PyClass::acoustic_model)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user