mirror of
https://github.com/alphacep/vosk-api.git
synced 2026-03-23 00:01:25 +08:00
558 lines
38 KiB
Plaintext
558 lines
38 KiB
Plaintext
{
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0,
|
||
"metadata": {
|
||
"colab": {
|
||
"name": "Vosk Adaptation",
|
||
"provenance": [],
|
||
"collapsed_sections": []
|
||
},
|
||
"kernelspec": {
|
||
"name": "python3",
|
||
"display_name": "Python 3"
|
||
},
|
||
"language_info": {
|
||
"name": "python"
|
||
},
|
||
"accelerator": "GPU",
|
||
"gpuClass": "standard"
|
||
},
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "URzWMmv50-Ba",
|
||
"outputId": "0e096a99-74dd-42e2-efb1-9cba784c3664"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"/content\n",
|
||
"--2022-08-17 09:48:52-- https://alphacephei.com/vosk-colab/kaldi.tar.gz\n",
|
||
"Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2\n",
|
||
"Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.\n",
|
||
"HTTP request sent, awaiting response... 200 OK\n",
|
||
"Length: 809174554 (772M) [application/octet-stream]\n",
|
||
"Saving to: ‘kaldi.tar.gz’\n",
|
||
"\n",
|
||
"kaldi.tar.gz 100%[===================>] 771.69M 20.3MB/s in 40s \n",
|
||
"\n",
|
||
"2022-08-17 09:49:33 (19.4 MB/s) - ‘kaldi.tar.gz’ saved [809174554/809174554]\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%cd /content\n",
|
||
"!wget -c https://alphacephei.com/vosk-colab/kaldi.tar.gz\n",
|
||
"!tar xzf kaldi.tar.gz"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"%cd /content/kaldi/egs/ac\n",
|
||
"!wget -c https://alphacephei.com/vosk-colab/vosk-model-small-en-us-0.15-compile-colab.tar.gz\n",
|
||
"!rm -rf vosk-model-small-en-us-0.15-compile-colab\n",
|
||
"!tar xf vosk-model-small-en-us-0.15-compile-colab.tar.gz"
|
||
],
|
||
"metadata": {
|
||
"id": "-065p7WC2SHh",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"outputId": "241c7473-7464-48d5-b48d-dc6e3bf4971d"
|
||
},
|
||
"execution_count": 8,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"/content/kaldi/egs/ac\n",
|
||
"--2022-08-17 10:28:26-- https://alphacephei.com/vosk-colab/vosk-model-small-en-us-0.15-compile-colab.tar.gz\n",
|
||
"Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2\n",
|
||
"Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.\n",
|
||
"HTTP request sent, awaiting response... 200 OK\n",
|
||
"Length: 59618100 (57M) [application/octet-stream]\n",
|
||
"Saving to: ‘vosk-model-small-en-us-0.15-compile-colab.tar.gz’\n",
|
||
"\n",
|
||
"vosk-model-small-en 100%[===================>] 56.86M 18.6MB/s in 3.6s \n",
|
||
"\n",
|
||
"2022-08-17 10:28:30 (15.7 MB/s) - ‘vosk-model-small-en-us-0.15-compile-colab.tar.gz’ saved [59618100/59618100]\n",
|
||
"\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"%cd /content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab\n",
|
||
"!ls\n",
|
||
"!cat compile-graph.sh\n",
|
||
"!bash compile-graph.sh"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "wuDjvNbd2sf9",
|
||
"outputId": "34a1d2fe-d443-4574-e25d-824e38eb3a78"
|
||
},
|
||
"execution_count": 9,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab\n",
|
||
"compile-graph.sh data_test decode.sh\texp\t local path.sh steps\n",
|
||
"conf\t\t db\t dict.py\tget_vocab.py mfcc RESULTS utils\n",
|
||
"#!/bin/bash\n",
|
||
"\n",
|
||
"set -x\n",
|
||
"\n",
|
||
". path.sh\n",
|
||
"\n",
|
||
"pip3 install phonetisaurus\n",
|
||
"\n",
|
||
"rm -rf data\n",
|
||
"rm -rf exp/tdnn/lgraph\n",
|
||
"rm -rf exp/tdnn/lgraph_orig\n",
|
||
"\n",
|
||
"mkdir -p data/dict\n",
|
||
"cp db/phone/* data/dict\n",
|
||
"./dict.py > data/dict/lexicon.txt\n",
|
||
"\n",
|
||
"python3 ./get_vocab.py > data/mix.vocab\n",
|
||
"ngramsymbols data/mix.vocab data/mix.syms\n",
|
||
"farcompilestrings --fst_type=compact --symbols=data/mix.syms --keep_symbols --unknown_symbol=\"[unk]\" db/extra.txt data/extra.far\n",
|
||
"ngramcount --order=3 data/extra.far - |\n",
|
||
" ngramprint --integers | grep -v \"<unk>\" | ngramread |\n",
|
||
" ngramshrink --method=count_prune --count_pattern=\"3+:3\" |\n",
|
||
" ngrammake --method=witten_bell - data/extra.mod\n",
|
||
"gunzip -c db/en-50k-0.4-android.lm.gz | ngramread --renormalize_arpa --ARPA --symbols=data/mix.syms - data/en-us.mod\n",
|
||
"ngrammerge --method=\"bayes_model_merge\" --normalize --alpha=0.95 --beta=0.05 data/en-us.mod data/extra.mod data/en-us-mix.mod\n",
|
||
"ngramprint --ARPA data/en-us-mix.mod | gzip -c > data/en-us-mix.lm.gz\n",
|
||
"\n",
|
||
"# Prune for the first stage if needed\n",
|
||
"# ngramshrink --method=relative_entropy --theta=2e-8 data/en-us-mix.mod data/en-us-mix-prune.mod\n",
|
||
"# ngramprint --ARPA data/en-us-mix-prune.mod | gzip -c > data/en-us-mix-small.lm.gz\n",
|
||
"\n",
|
||
"utils/prepare_lang.sh data/dict \"[unk]\" data/lang_local data/lang\n",
|
||
"utils/format_lm.sh data/lang db/en-50k-0.4-android.lm.gz data/dict/lexicon.txt data/lang_test\n",
|
||
"utils/format_lm.sh data/lang data/en-us-mix.lm.gz data/dict/lexicon.txt data/lang_test_adapt\n",
|
||
"\n",
|
||
"utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/tdnn exp/tdnn/graph\n",
|
||
"utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_adapt exp/tdnn exp/tdnn/graph_adapt\n",
|
||
"\n",
|
||
"# Lookahead part goes OOM\n",
|
||
"#utils/mkgraph_lookahead.sh \\\n",
|
||
"# --self-loop-scale 1.0 data/lang \\\n",
|
||
"# exp/tdnn data/en-us-mix.lm.gz exp/tdnn/lgraph\n",
|
||
"#utils/mkgraph_lookahead.sh \\\n",
|
||
"# --self-loop-scale 1.0 data/lang \\\n",
|
||
"# exp/tdnn db/en-50k-0.4-android.lm.gz exp/tdnn/lgraph_orig\n",
|
||
"+ . path.sh\n",
|
||
"+++ pwd\n",
|
||
"++ export KALDI_ROOT=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../..\n",
|
||
"++ KALDI_ROOT=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../..\n",
|
||
"++ export PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
|
||
"++ PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
|
||
"++ export PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/ngram-1.3.7/src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
|
||
"++ PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/ngram-1.3.7/src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
|
||
"++ export LD_LIBRARY_PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/lib/fst/\n",
|
||
"++ LD_LIBRARY_PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/lib/fst/\n",
|
||
"++ export LC_ALL=C\n",
|
||
"++ LC_ALL=C\n",
|
||
"+ pip3 install phonetisaurus\n",
|
||
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
"Requirement already satisfied: phonetisaurus in /usr/local/lib/python3.7/dist-packages (0.3.0)\n",
|
||
"+ rm -rf data\n",
|
||
"+ rm -rf exp/tdnn/lgraph\n",
|
||
"+ rm -rf exp/tdnn/lgraph_orig\n",
|
||
"+ mkdir -p data/dict\n",
|
||
"+ cp db/phone/extra_questions.txt db/phone/nonsilence_phones.txt db/phone/optional_silence.txt db/phone/silence_phones.txt data/dict\n",
|
||
"+ ./dict.py\n",
|
||
"+ python3 ./get_vocab.py\n",
|
||
"+ ngramsymbols data/mix.vocab data/mix.syms\n",
|
||
"+ farcompilestrings --fst_type=compact --symbols=data/mix.syms --keep_symbols '--unknown_symbol=[unk]' db/extra.txt data/extra.far\n",
|
||
"+ ngramcount --order=3 data/extra.far -\n",
|
||
"+ ngrammake --method=witten_bell - data/extra.mod\n",
|
||
"+ ngramshrink --method=count_prune --count_pattern=3+:3\n",
|
||
"+ ngramread\n",
|
||
"+ ngramprint --integers\n",
|
||
"+ grep -v '<unk>'\n",
|
||
"+ ngramread --renormalize_arpa --ARPA --symbols=data/mix.syms - data/en-us.mod\n",
|
||
"+ gunzip -c db/en-50k-0.4-android.lm.gz\n",
|
||
"+ ngrammerge --method=bayes_model_merge --normalize --alpha=0.95 --beta=0.05 data/en-us.mod data/extra.mod data/en-us-mix.mod\n",
|
||
"+ ngramprint --ARPA data/en-us-mix.mod\n",
|
||
"+ gzip -c\n",
|
||
"+ utils/prepare_lang.sh data/dict '[unk]' data/lang_local data/lang\n",
|
||
"utils/prepare_lang.sh data/dict [unk] data/lang_local data/lang\n",
|
||
"Checking data/dict/silence_phones.txt ...\n",
|
||
"--> reading data/dict/silence_phones.txt\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> data/dict/silence_phones.txt is OK\n",
|
||
"\n",
|
||
"Checking data/dict/optional_silence.txt ...\n",
|
||
"--> reading data/dict/optional_silence.txt\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> data/dict/optional_silence.txt is OK\n",
|
||
"\n",
|
||
"Checking data/dict/nonsilence_phones.txt ...\n",
|
||
"--> reading data/dict/nonsilence_phones.txt\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> data/dict/nonsilence_phones.txt is OK\n",
|
||
"\n",
|
||
"Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n",
|
||
"--> disjoint property is OK.\n",
|
||
"\n",
|
||
"Checking data/dict/lexicon.txt\n",
|
||
"--> reading data/dict/lexicon.txt\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> data/dict/lexicon.txt is OK\n",
|
||
"\n",
|
||
"Checking data/dict/extra_questions.txt ...\n",
|
||
"--> data/dict/extra_questions.txt is empty (this is OK)\n",
|
||
"--> SUCCESS [validating dictionary directory data/dict]\n",
|
||
"\n",
|
||
"**Creating data/dict/lexiconp.txt from data/dict/lexicon.txt\n",
|
||
"fstaddselfloops data/lang/phones/wdisambig_phones.int data/lang/phones/wdisambig_words.int \n",
|
||
"prepare_lang.sh: validating output directory\n",
|
||
"utils/validate_lang.pl data/lang\n",
|
||
"Checking existence of separator file\n",
|
||
"separator file data/lang/subword_separator.txt is empty or does not exist, deal in word case.\n",
|
||
"Checking data/lang/phones.txt ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> data/lang/phones.txt is OK\n",
|
||
"\n",
|
||
"Checking words.txt: #0 ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> data/lang/words.txt is OK\n",
|
||
"\n",
|
||
"Checking disjoint: silence.txt, nonsilence.txt, disambig.txt ...\n",
|
||
"--> silence.txt and nonsilence.txt are disjoint\n",
|
||
"--> silence.txt and disambig.txt are disjoint\n",
|
||
"--> disambig.txt and nonsilence.txt are disjoint\n",
|
||
"--> disjoint property is OK\n",
|
||
"\n",
|
||
"Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n",
|
||
"--> found no unexplainable phones in phones.txt\n",
|
||
"\n",
|
||
"Checking data/lang/phones/context_indep.{txt, int, csl} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 10 entry/entries in data/lang/phones/context_indep.txt\n",
|
||
"--> data/lang/phones/context_indep.int corresponds to data/lang/phones/context_indep.txt\n",
|
||
"--> data/lang/phones/context_indep.csl corresponds to data/lang/phones/context_indep.txt\n",
|
||
"--> data/lang/phones/context_indep.{txt, int, csl} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/nonsilence.{txt, int, csl} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 156 entry/entries in data/lang/phones/nonsilence.txt\n",
|
||
"--> data/lang/phones/nonsilence.int corresponds to data/lang/phones/nonsilence.txt\n",
|
||
"--> data/lang/phones/nonsilence.csl corresponds to data/lang/phones/nonsilence.txt\n",
|
||
"--> data/lang/phones/nonsilence.{txt, int, csl} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/silence.{txt, int, csl} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 10 entry/entries in data/lang/phones/silence.txt\n",
|
||
"--> data/lang/phones/silence.int corresponds to data/lang/phones/silence.txt\n",
|
||
"--> data/lang/phones/silence.csl corresponds to data/lang/phones/silence.txt\n",
|
||
"--> data/lang/phones/silence.{txt, int, csl} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/optional_silence.{txt, int, csl} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 1 entry/entries in data/lang/phones/optional_silence.txt\n",
|
||
"--> data/lang/phones/optional_silence.int corresponds to data/lang/phones/optional_silence.txt\n",
|
||
"--> data/lang/phones/optional_silence.csl corresponds to data/lang/phones/optional_silence.txt\n",
|
||
"--> data/lang/phones/optional_silence.{txt, int, csl} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/disambig.{txt, int, csl} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 14 entry/entries in data/lang/phones/disambig.txt\n",
|
||
"--> data/lang/phones/disambig.int corresponds to data/lang/phones/disambig.txt\n",
|
||
"--> data/lang/phones/disambig.csl corresponds to data/lang/phones/disambig.txt\n",
|
||
"--> data/lang/phones/disambig.{txt, int, csl} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/roots.{txt, int} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 41 entry/entries in data/lang/phones/roots.txt\n",
|
||
"--> data/lang/phones/roots.int corresponds to data/lang/phones/roots.txt\n",
|
||
"--> data/lang/phones/roots.{txt, int} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/sets.{txt, int} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 41 entry/entries in data/lang/phones/sets.txt\n",
|
||
"--> data/lang/phones/sets.int corresponds to data/lang/phones/sets.txt\n",
|
||
"--> data/lang/phones/sets.{txt, int} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/extra_questions.{txt, int} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 9 entry/entries in data/lang/phones/extra_questions.txt\n",
|
||
"--> data/lang/phones/extra_questions.int corresponds to data/lang/phones/extra_questions.txt\n",
|
||
"--> data/lang/phones/extra_questions.{txt, int} are OK\n",
|
||
"\n",
|
||
"Checking data/lang/phones/word_boundary.{txt, int} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 166 entry/entries in data/lang/phones/word_boundary.txt\n",
|
||
"--> data/lang/phones/word_boundary.int corresponds to data/lang/phones/word_boundary.txt\n",
|
||
"--> data/lang/phones/word_boundary.{txt, int} are OK\n",
|
||
"\n",
|
||
"Checking optional_silence.txt ...\n",
|
||
"--> reading data/lang/phones/optional_silence.txt\n",
|
||
"--> data/lang/phones/optional_silence.txt is OK\n",
|
||
"\n",
|
||
"Checking disambiguation symbols: #0 and #1\n",
|
||
"--> data/lang/phones/disambig.txt has \"#0\" and \"#1\"\n",
|
||
"--> data/lang/phones/disambig.txt is OK\n",
|
||
"\n",
|
||
"Checking topo ...\n",
|
||
"\n",
|
||
"Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n",
|
||
"--> data/lang/phones/word_boundary.txt doesn't include disambiguation symbols\n",
|
||
"--> data/lang/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n",
|
||
"--> data/lang/phones/word_boundary.txt is OK\n",
|
||
"\n",
|
||
"Checking word-level disambiguation symbols...\n",
|
||
"--> data/lang/phones/wdisambig.txt exists (newer prepare_lang.sh)\n",
|
||
"Checking word_boundary.int and disambig.int\n",
|
||
"--> generating a 98 word/subword sequence\n",
|
||
"--> resulting phone sequence from L.fst corresponds to the word sequence\n",
|
||
"--> L.fst is OK\n",
|
||
"--> generating a 49 word/subword sequence\n",
|
||
"--> resulting phone sequence from L_disambig.fst corresponds to the word sequence\n",
|
||
"--> L_disambig.fst is OK\n",
|
||
"\n",
|
||
"Checking data/lang/oov.{txt, int} ...\n",
|
||
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
|
||
"--> text contains only allowed whitespaces\n",
|
||
"--> 1 entry/entries in data/lang/oov.txt\n",
|
||
"--> data/lang/oov.int corresponds to data/lang/oov.txt\n",
|
||
"--> data/lang/oov.{txt, int} are OK\n",
|
||
"\n",
|
||
"--> data/lang/L.fst is olabel sorted\n",
|
||
"--> data/lang/L_disambig.fst is olabel sorted\n",
|
||
"--> SUCCESS [validating lang directory data/lang]\n",
|
||
"+ utils/format_lm.sh data/lang db/en-50k-0.4-android.lm.gz data/dict/lexicon.txt data/lang_test\n",
|
||
"Converting 'db/en-50k-0.4-android.lm.gz' to FST\n",
|
||
"arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst \n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:94) Reading \\data\\ section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\1-grams: section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\2-grams: section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\3-grams: section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:RemoveRedundantStates():arpa-lm-compiler.cc:359) Reduced num-states from 1217362 to 185036\n",
|
||
"fstisstochastic data/lang_test/G.fst \n",
|
||
"0.476411 -3.03779\n",
|
||
"Succeeded in formatting LM: 'db/en-50k-0.4-android.lm.gz'\n",
|
||
"+ utils/format_lm.sh data/lang data/en-us-mix.lm.gz data/dict/lexicon.txt data/lang_test_adapt\n",
|
||
"Converting 'data/en-us-mix.lm.gz' to FST\n",
|
||
"arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_test_adapt/words.txt - data/lang_test_adapt/G.fst \n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:94) Reading \\data\\ section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\1-grams: section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\2-grams: section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\3-grams: section.\n",
|
||
"LOG (arpa2fst[5.5.1046~1-76cd5]:RemoveRedundantStates():arpa-lm-compiler.cc:359) Reduced num-states from 1217646 to 185095\n",
|
||
"fstisstochastic data/lang_test_adapt/G.fst \n",
|
||
"6.81902e-07 -3.03779\n",
|
||
"Succeeded in formatting LM: 'data/en-us-mix.lm.gz'\n",
|
||
"+ utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/tdnn exp/tdnn/graph\n",
|
||
"tree-info exp/tdnn/tree \n",
|
||
"tree-info exp/tdnn/tree \n",
|
||
"fstdeterminizestar --use-log=true \n",
|
||
"fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst \n",
|
||
"fstminimizeencoded \n",
|
||
"fstpushspecial \n",
|
||
"fstisstochastic data/lang_test/tmp/LG.fst \n",
|
||
"-0.145498 -0.146281\n",
|
||
"[info]: LG not stochastic.\n",
|
||
"fstcomposecontext --context-size=2 --central-position=1 --read-disambig-syms=data/lang_test/phones/disambig.int --write-disambig-syms=data/lang_test/tmp/disambig_ilabels_2_1.int data/lang_test/tmp/ilabels_2_1.905 data/lang_test/tmp/LG.fst \n",
|
||
"fstisstochastic data/lang_test/tmp/CLG_2_1.fst \n",
|
||
"-0.145498 -0.146281\n",
|
||
"[info]: CLG not stochastic.\n",
|
||
"make-h-transducer --disambig-syms-out=exp/tdnn/graph/disambig_tid.int --transition-scale=1.0 data/lang_test/tmp/ilabels_2_1 exp/tdnn/tree exp/tdnn/final.mdl \n",
|
||
"fstrmepslocal \n",
|
||
"fsttablecompose exp/tdnn/graph/Ha.fst data/lang_test/tmp/CLG_2_1.fst \n",
|
||
"fstdeterminizestar --use-log=true \n",
|
||
"fstminimizeencoded \n",
|
||
"fstrmsymbols exp/tdnn/graph/disambig_tid.int \n",
|
||
"fstisstochastic exp/tdnn/graph/HCLGa.fst \n",
|
||
"-0.109817 -0.571742\n",
|
||
"HCLGa is not stochastic\n",
|
||
"add-self-loops --self-loop-scale=1.0 --reorder=true exp/tdnn/final.mdl exp/tdnn/graph/HCLGa.fst \n",
|
||
"fstisstochastic exp/tdnn/graph/HCLG.fst \n",
|
||
"1.90465e-09 -0.415046\n",
|
||
"[info]: final HCLG is not stochastic.\n",
|
||
"+ utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_adapt exp/tdnn exp/tdnn/graph_adapt\n",
|
||
"tree-info exp/tdnn/tree \n",
|
||
"tree-info exp/tdnn/tree \n",
|
||
"fstdeterminizestar --use-log=true \n",
|
||
"fsttablecompose data/lang_test_adapt/L_disambig.fst data/lang_test_adapt/G.fst \n",
|
||
"fstminimizeencoded \n",
|
||
"fstpushspecial \n",
|
||
"fstisstochastic data/lang_test_adapt/tmp/LG.fst \n",
|
||
"-0.148474 -0.149181\n",
|
||
"[info]: LG not stochastic.\n",
|
||
"fstcomposecontext --context-size=2 --central-position=1 --read-disambig-syms=data/lang_test_adapt/phones/disambig.int --write-disambig-syms=data/lang_test_adapt/tmp/disambig_ilabels_2_1.int data/lang_test_adapt/tmp/ilabels_2_1.979 data/lang_test_adapt/tmp/LG.fst \n",
|
||
"fstisstochastic data/lang_test_adapt/tmp/CLG_2_1.fst \n",
|
||
"-0.148474 -0.149181\n",
|
||
"[info]: CLG not stochastic.\n",
|
||
"make-h-transducer --disambig-syms-out=exp/tdnn/graph_adapt/disambig_tid.int --transition-scale=1.0 data/lang_test_adapt/tmp/ilabels_2_1 exp/tdnn/tree exp/tdnn/final.mdl \n",
|
||
"fstrmepslocal \n",
|
||
"fsttablecompose exp/tdnn/graph_adapt/Ha.fst data/lang_test_adapt/tmp/CLG_2_1.fst \n",
|
||
"fstdeterminizestar --use-log=true \n",
|
||
"fstminimizeencoded \n",
|
||
"fstrmsymbols exp/tdnn/graph_adapt/disambig_tid.int \n",
|
||
"fstisstochastic exp/tdnn/graph_adapt/HCLGa.fst \n",
|
||
"-0.113907 -0.5857\n",
|
||
"HCLGa is not stochastic\n",
|
||
"add-self-loops --self-loop-scale=1.0 --reorder=true exp/tdnn/final.mdl exp/tdnn/graph_adapt/HCLGa.fst \n",
|
||
"fstisstochastic exp/tdnn/graph_adapt/HCLG.fst \n",
|
||
"1.90465e-09 -0.423618\n",
|
||
"[info]: final HCLG is not stochastic.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"!cat decode.sh\n",
|
||
"!bash decode.sh"
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "Sl3QBI1MXpc-",
|
||
"outputId": "affac8a3-782f-4000-e31f-81bfed47a37a"
|
||
},
|
||
"execution_count": 10,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"#!/bin/bash\n",
|
||
"\n",
|
||
". path.sh\n",
|
||
"\n",
|
||
"steps/make_mfcc.sh --nj 10 data_test/test_small exp/make_mfcc/test mfcc\n",
|
||
"steps/compute_cmvn_stats.sh data_test/test_small exp/make_mfcc/test mfcc\n",
|
||
"utils/fix_data_dir.sh data_test/test_small\n",
|
||
"\n",
|
||
"steps/online/nnet2/extract_ivectors_online.sh --nj 4 \\\n",
|
||
" data_test/test_small exp/extractor \\\n",
|
||
" exp/ivectors_test\n",
|
||
"\n",
|
||
"steps/nnet3/decode.sh --nj 4 \\\n",
|
||
" --acwt 1.0 --post-decode-acwt 10.0 \\\n",
|
||
" --online-ivector-dir exp/ivectors_test \\\n",
|
||
" exp/tdnn/graph_adapt data_test/test_small exp/tdnn/decode_test_adapt\n",
|
||
"\n",
|
||
"steps/nnet3/decode.sh --nj 4 \\\n",
|
||
" --acwt 1.0 --post-decode-acwt 10.0 \\\n",
|
||
" --online-ivector-dir exp/ivectors_test \\\n",
|
||
" exp/tdnn/graph data_test/test_small exp/tdnn/decode_test\n",
|
||
"\n",
|
||
"#steps/nnet3/decode_lookahead.sh --nj 4 \\\n",
|
||
"# --acwt 1.0 --post-decode-acwt 10.0 \\\n",
|
||
"# --online-ivector-dir exp/ivectors_test \\\n",
|
||
"# exp/tdnn/lgraph data_test/test_small exp/tdnn/decode_test_adapt\n",
|
||
"#steps/nnet3/decode_lookahead.sh --nj 4 \\\n",
|
||
"# --acwt 1.0 --post-decode-acwt 10.0 \\\n",
|
||
"# --online-ivector-dir exp/ivectors_test \\\n",
|
||
"# exp/tdnn/lgraph_orig data_test/test_small exp/tdnn/decode_test\n",
|
||
"steps/make_mfcc.sh --nj 10 data_test/test_small exp/make_mfcc/test mfcc\n",
|
||
"steps/make_mfcc.sh: moving data_test/test_small/feats.scp to data_test/test_small/.backup\n",
|
||
"utils/validate_data_dir.sh: Successfully validated data-directory data_test/test_small\n",
|
||
"steps/make_mfcc.sh: [info]: no segments file exists: assuming wav.scp indexed by utterance.\n",
|
||
"steps/make_mfcc.sh: Succeeded creating MFCC features for test_small\n",
|
||
"steps/compute_cmvn_stats.sh data_test/test_small exp/make_mfcc/test mfcc\n",
|
||
"Succeeded creating CMVN stats for test_small\n",
|
||
"fix_data_dir.sh: kept all 50 utterances.\n",
|
||
"fix_data_dir.sh: old files are kept in data_test/test_small/.backup\n",
|
||
"steps/online/nnet2/extract_ivectors_online.sh --nj 4 data_test/test_small exp/extractor exp/ivectors_test\n",
|
||
"steps/online/nnet2/extract_ivectors_online.sh: extracting iVectors\n",
|
||
"steps/online/nnet2/extract_ivectors_online.sh: combining iVectors across jobs\n",
|
||
"steps/online/nnet2/extract_ivectors_online.sh: done extracting (online) iVectors to exp/ivectors_test using the extractor in exp/extractor.\n",
|
||
"steps/nnet3/decode.sh --nj 4 --acwt 1.0 --post-decode-acwt 10.0 --online-ivector-dir exp/ivectors_test exp/tdnn/graph_adapt data_test/test_small exp/tdnn/decode_test_adapt\n",
|
||
"steps/nnet2/check_ivectors_compatible.sh: WARNING: One of the directories do not contain iVector ID.\n",
|
||
"steps/nnet2/check_ivectors_compatible.sh: WARNING: That means it's you who's reponsible for keeping \n",
|
||
"steps/nnet2/check_ivectors_compatible.sh: WARNING: the directories compatible\n",
|
||
"steps/nnet3/decode.sh: feature type is raw\n",
|
||
"steps/diagnostic/analyze_lats.sh --cmd run.pl --iter final exp/tdnn/graph_adapt exp/tdnn/decode_test_adapt\n",
|
||
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test_adapt/log/analyze_alignments.log\n",
|
||
"Overall, lattice depth (10,50,90-percentile)=(1,1,4) and mean=2.4\n",
|
||
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test_adapt/log/analyze_lattice_depth_stats.log\n",
|
||
"score best paths\n",
|
||
"local/score.sh --cmd run.pl data_test/test_small exp/tdnn/graph_adapt exp/tdnn/decode_test_adapt\n",
|
||
"local/score.sh: scoring with word insertion penalty=0.0,0.5,1.0\n",
|
||
"score confidence and timing with sclite\n",
|
||
"Decoding done.\n",
|
||
"steps/nnet3/decode.sh --nj 4 --acwt 1.0 --post-decode-acwt 10.0 --online-ivector-dir exp/ivectors_test exp/tdnn/graph data_test/test_small exp/tdnn/decode_test\n",
|
||
"steps/nnet2/check_ivectors_compatible.sh: WARNING: One of the directories do not contain iVector ID.\n",
|
||
"steps/nnet2/check_ivectors_compatible.sh: WARNING: That means it's you who's reponsible for keeping \n",
|
||
"steps/nnet2/check_ivectors_compatible.sh: WARNING: the directories compatible\n",
|
||
"steps/nnet3/decode.sh: feature type is raw\n",
|
||
"steps/diagnostic/analyze_lats.sh --cmd run.pl --iter final exp/tdnn/graph exp/tdnn/decode_test\n",
|
||
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test/log/analyze_alignments.log\n",
|
||
"Overall, lattice depth (10,50,90-percentile)=(1,5,23) and mean=10.4\n",
|
||
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test/log/analyze_lattice_depth_stats.log\n",
|
||
"score best paths\n",
|
||
"local/score.sh --cmd run.pl data_test/test_small exp/tdnn/graph exp/tdnn/decode_test\n",
|
||
"local/score.sh: scoring with word insertion penalty=0.0,0.5,1.0\n",
|
||
"score confidence and timing with sclite\n",
|
||
"Decoding done.\n"
|
||
]
|
||
}
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"source": [
|
||
"!bash RESULTS"
|
||
],
|
||
"metadata": {
|
||
"id": "ABtcNyUDX4S8",
|
||
"outputId": "d5e50be7-3293-4a59-94b8-9bfa46736481",
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
}
|
||
},
|
||
"execution_count": 11,
|
||
"outputs": [
|
||
{
|
||
"output_type": "stream",
|
||
"name": "stdout",
|
||
"text": [
|
||
"%WER 11.77 [ 107 / 909, 13 ins, 7 del, 87 sub ] exp/tdnn/decode_test/wer_7_1.0\n",
|
||
"%WER 0.22 [ 2 / 909, 0 ins, 1 del, 1 sub ] exp/tdnn/decode_test_adapt/wer_10_1.0\n"
|
||
]
|
||
}
|
||
]
|
||
}
|
||
]
|
||
} |