alphacep_vosk-api/python/example/colab/vosk-adaptation.ipynb
2022-08-17 12:38:17 +02:00

558 lines
38 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Vosk Adaptation",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "URzWMmv50-Ba",
"outputId": "0e096a99-74dd-42e2-efb1-9cba784c3664"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content\n",
"--2022-08-17 09:48:52-- https://alphacephei.com/vosk-colab/kaldi.tar.gz\n",
"Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2\n",
"Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 809174554 (772M) [application/octet-stream]\n",
"Saving to: kaldi.tar.gz\n",
"\n",
"kaldi.tar.gz 100%[===================>] 771.69M 20.3MB/s in 40s \n",
"\n",
"2022-08-17 09:49:33 (19.4 MB/s) - kaldi.tar.gz saved [809174554/809174554]\n",
"\n"
]
}
],
"source": [
"%cd /content\n",
"!wget -c https://alphacephei.com/vosk-colab/kaldi.tar.gz\n",
"!tar xzf kaldi.tar.gz"
]
},
{
"cell_type": "code",
"source": [
"%cd /content/kaldi/egs/ac\n",
"!wget -c https://alphacephei.com/vosk-colab/vosk-model-small-en-us-0.15-compile-colab.tar.gz\n",
"!rm -rf vosk-model-small-en-us-0.15-compile-colab\n",
"!tar xf vosk-model-small-en-us-0.15-compile-colab.tar.gz"
],
"metadata": {
"id": "-065p7WC2SHh",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "241c7473-7464-48d5-b48d-dc6e3bf4971d"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/kaldi/egs/ac\n",
"--2022-08-17 10:28:26-- https://alphacephei.com/vosk-colab/vosk-model-small-en-us-0.15-compile-colab.tar.gz\n",
"Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2\n",
"Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 59618100 (57M) [application/octet-stream]\n",
"Saving to: vosk-model-small-en-us-0.15-compile-colab.tar.gz\n",
"\n",
"vosk-model-small-en 100%[===================>] 56.86M 18.6MB/s in 3.6s \n",
"\n",
"2022-08-17 10:28:30 (15.7 MB/s) - vosk-model-small-en-us-0.15-compile-colab.tar.gz saved [59618100/59618100]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%cd /content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab\n",
"!ls\n",
"!cat compile-graph.sh\n",
"!bash compile-graph.sh"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wuDjvNbd2sf9",
"outputId": "34a1d2fe-d443-4574-e25d-824e38eb3a78"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab\n",
"compile-graph.sh data_test decode.sh\texp\t local path.sh steps\n",
"conf\t\t db\t dict.py\tget_vocab.py mfcc RESULTS utils\n",
"#!/bin/bash\n",
"\n",
"set -x\n",
"\n",
". path.sh\n",
"\n",
"pip3 install phonetisaurus\n",
"\n",
"rm -rf data\n",
"rm -rf exp/tdnn/lgraph\n",
"rm -rf exp/tdnn/lgraph_orig\n",
"\n",
"mkdir -p data/dict\n",
"cp db/phone/* data/dict\n",
"./dict.py > data/dict/lexicon.txt\n",
"\n",
"python3 ./get_vocab.py > data/mix.vocab\n",
"ngramsymbols data/mix.vocab data/mix.syms\n",
"farcompilestrings --fst_type=compact --symbols=data/mix.syms --keep_symbols --unknown_symbol=\"[unk]\" db/extra.txt data/extra.far\n",
"ngramcount --order=3 data/extra.far - |\n",
" ngramprint --integers | grep -v \"<unk>\" | ngramread |\n",
" ngramshrink --method=count_prune --count_pattern=\"3+:3\" |\n",
" ngrammake --method=witten_bell - data/extra.mod\n",
"gunzip -c db/en-50k-0.4-android.lm.gz | ngramread --renormalize_arpa --ARPA --symbols=data/mix.syms - data/en-us.mod\n",
"ngrammerge --method=\"bayes_model_merge\" --normalize --alpha=0.95 --beta=0.05 data/en-us.mod data/extra.mod data/en-us-mix.mod\n",
"ngramprint --ARPA data/en-us-mix.mod | gzip -c > data/en-us-mix.lm.gz\n",
"\n",
"# Prune for the first stage if needed\n",
"# ngramshrink --method=relative_entropy --theta=2e-8 data/en-us-mix.mod data/en-us-mix-prune.mod\n",
"# ngramprint --ARPA data/en-us-mix-prune.mod | gzip -c > data/en-us-mix-small.lm.gz\n",
"\n",
"utils/prepare_lang.sh data/dict \"[unk]\" data/lang_local data/lang\n",
"utils/format_lm.sh data/lang db/en-50k-0.4-android.lm.gz data/dict/lexicon.txt data/lang_test\n",
"utils/format_lm.sh data/lang data/en-us-mix.lm.gz data/dict/lexicon.txt data/lang_test_adapt\n",
"\n",
"utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/tdnn exp/tdnn/graph\n",
"utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_adapt exp/tdnn exp/tdnn/graph_adapt\n",
"\n",
"# Lookahead part goes OOM\n",
"#utils/mkgraph_lookahead.sh \\\n",
"# --self-loop-scale 1.0 data/lang \\\n",
"# exp/tdnn data/en-us-mix.lm.gz exp/tdnn/lgraph\n",
"#utils/mkgraph_lookahead.sh \\\n",
"# --self-loop-scale 1.0 data/lang \\\n",
"# exp/tdnn db/en-50k-0.4-android.lm.gz exp/tdnn/lgraph_orig\n",
"+ . path.sh\n",
"+++ pwd\n",
"++ export KALDI_ROOT=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../..\n",
"++ KALDI_ROOT=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../..\n",
"++ export PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
"++ PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
"++ export PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/ngram-1.3.7/src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
"++ PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/ngram-1.3.7/src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/utils:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fstbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/gmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/featbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lm:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/sgmm2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/fgmmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/latbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnetbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/online2bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/ivectorbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/lmbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/chainbin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../src/nnet3bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab:/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/sph2pipe_v2.5\n",
"++ export LD_LIBRARY_PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/lib/fst/\n",
"++ LD_LIBRARY_PATH=/content/kaldi/egs/ac/vosk-model-small-en-us-0.15-compile-colab/../../../tools/openfst/lib/fst/\n",
"++ export LC_ALL=C\n",
"++ LC_ALL=C\n",
"+ pip3 install phonetisaurus\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: phonetisaurus in /usr/local/lib/python3.7/dist-packages (0.3.0)\n",
"+ rm -rf data\n",
"+ rm -rf exp/tdnn/lgraph\n",
"+ rm -rf exp/tdnn/lgraph_orig\n",
"+ mkdir -p data/dict\n",
"+ cp db/phone/extra_questions.txt db/phone/nonsilence_phones.txt db/phone/optional_silence.txt db/phone/silence_phones.txt data/dict\n",
"+ ./dict.py\n",
"+ python3 ./get_vocab.py\n",
"+ ngramsymbols data/mix.vocab data/mix.syms\n",
"+ farcompilestrings --fst_type=compact --symbols=data/mix.syms --keep_symbols '--unknown_symbol=[unk]' db/extra.txt data/extra.far\n",
"+ ngramcount --order=3 data/extra.far -\n",
"+ ngrammake --method=witten_bell - data/extra.mod\n",
"+ ngramshrink --method=count_prune --count_pattern=3+:3\n",
"+ ngramread\n",
"+ ngramprint --integers\n",
"+ grep -v '<unk>'\n",
"+ ngramread --renormalize_arpa --ARPA --symbols=data/mix.syms - data/en-us.mod\n",
"+ gunzip -c db/en-50k-0.4-android.lm.gz\n",
"+ ngrammerge --method=bayes_model_merge --normalize --alpha=0.95 --beta=0.05 data/en-us.mod data/extra.mod data/en-us-mix.mod\n",
"+ ngramprint --ARPA data/en-us-mix.mod\n",
"+ gzip -c\n",
"+ utils/prepare_lang.sh data/dict '[unk]' data/lang_local data/lang\n",
"utils/prepare_lang.sh data/dict [unk] data/lang_local data/lang\n",
"Checking data/dict/silence_phones.txt ...\n",
"--> reading data/dict/silence_phones.txt\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> data/dict/silence_phones.txt is OK\n",
"\n",
"Checking data/dict/optional_silence.txt ...\n",
"--> reading data/dict/optional_silence.txt\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> data/dict/optional_silence.txt is OK\n",
"\n",
"Checking data/dict/nonsilence_phones.txt ...\n",
"--> reading data/dict/nonsilence_phones.txt\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> data/dict/nonsilence_phones.txt is OK\n",
"\n",
"Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n",
"--> disjoint property is OK.\n",
"\n",
"Checking data/dict/lexicon.txt\n",
"--> reading data/dict/lexicon.txt\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> data/dict/lexicon.txt is OK\n",
"\n",
"Checking data/dict/extra_questions.txt ...\n",
"--> data/dict/extra_questions.txt is empty (this is OK)\n",
"--> SUCCESS [validating dictionary directory data/dict]\n",
"\n",
"**Creating data/dict/lexiconp.txt from data/dict/lexicon.txt\n",
"fstaddselfloops data/lang/phones/wdisambig_phones.int data/lang/phones/wdisambig_words.int \n",
"prepare_lang.sh: validating output directory\n",
"utils/validate_lang.pl data/lang\n",
"Checking existence of separator file\n",
"separator file data/lang/subword_separator.txt is empty or does not exist, deal in word case.\n",
"Checking data/lang/phones.txt ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> data/lang/phones.txt is OK\n",
"\n",
"Checking words.txt: #0 ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> data/lang/words.txt is OK\n",
"\n",
"Checking disjoint: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> silence.txt and nonsilence.txt are disjoint\n",
"--> silence.txt and disambig.txt are disjoint\n",
"--> disambig.txt and nonsilence.txt are disjoint\n",
"--> disjoint property is OK\n",
"\n",
"Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> found no unexplainable phones in phones.txt\n",
"\n",
"Checking data/lang/phones/context_indep.{txt, int, csl} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 10 entry/entries in data/lang/phones/context_indep.txt\n",
"--> data/lang/phones/context_indep.int corresponds to data/lang/phones/context_indep.txt\n",
"--> data/lang/phones/context_indep.csl corresponds to data/lang/phones/context_indep.txt\n",
"--> data/lang/phones/context_indep.{txt, int, csl} are OK\n",
"\n",
"Checking data/lang/phones/nonsilence.{txt, int, csl} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 156 entry/entries in data/lang/phones/nonsilence.txt\n",
"--> data/lang/phones/nonsilence.int corresponds to data/lang/phones/nonsilence.txt\n",
"--> data/lang/phones/nonsilence.csl corresponds to data/lang/phones/nonsilence.txt\n",
"--> data/lang/phones/nonsilence.{txt, int, csl} are OK\n",
"\n",
"Checking data/lang/phones/silence.{txt, int, csl} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 10 entry/entries in data/lang/phones/silence.txt\n",
"--> data/lang/phones/silence.int corresponds to data/lang/phones/silence.txt\n",
"--> data/lang/phones/silence.csl corresponds to data/lang/phones/silence.txt\n",
"--> data/lang/phones/silence.{txt, int, csl} are OK\n",
"\n",
"Checking data/lang/phones/optional_silence.{txt, int, csl} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 1 entry/entries in data/lang/phones/optional_silence.txt\n",
"--> data/lang/phones/optional_silence.int corresponds to data/lang/phones/optional_silence.txt\n",
"--> data/lang/phones/optional_silence.csl corresponds to data/lang/phones/optional_silence.txt\n",
"--> data/lang/phones/optional_silence.{txt, int, csl} are OK\n",
"\n",
"Checking data/lang/phones/disambig.{txt, int, csl} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 14 entry/entries in data/lang/phones/disambig.txt\n",
"--> data/lang/phones/disambig.int corresponds to data/lang/phones/disambig.txt\n",
"--> data/lang/phones/disambig.csl corresponds to data/lang/phones/disambig.txt\n",
"--> data/lang/phones/disambig.{txt, int, csl} are OK\n",
"\n",
"Checking data/lang/phones/roots.{txt, int} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 41 entry/entries in data/lang/phones/roots.txt\n",
"--> data/lang/phones/roots.int corresponds to data/lang/phones/roots.txt\n",
"--> data/lang/phones/roots.{txt, int} are OK\n",
"\n",
"Checking data/lang/phones/sets.{txt, int} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 41 entry/entries in data/lang/phones/sets.txt\n",
"--> data/lang/phones/sets.int corresponds to data/lang/phones/sets.txt\n",
"--> data/lang/phones/sets.{txt, int} are OK\n",
"\n",
"Checking data/lang/phones/extra_questions.{txt, int} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 9 entry/entries in data/lang/phones/extra_questions.txt\n",
"--> data/lang/phones/extra_questions.int corresponds to data/lang/phones/extra_questions.txt\n",
"--> data/lang/phones/extra_questions.{txt, int} are OK\n",
"\n",
"Checking data/lang/phones/word_boundary.{txt, int} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 166 entry/entries in data/lang/phones/word_boundary.txt\n",
"--> data/lang/phones/word_boundary.int corresponds to data/lang/phones/word_boundary.txt\n",
"--> data/lang/phones/word_boundary.{txt, int} are OK\n",
"\n",
"Checking optional_silence.txt ...\n",
"--> reading data/lang/phones/optional_silence.txt\n",
"--> data/lang/phones/optional_silence.txt is OK\n",
"\n",
"Checking disambiguation symbols: #0 and #1\n",
"--> data/lang/phones/disambig.txt has \"#0\" and \"#1\"\n",
"--> data/lang/phones/disambig.txt is OK\n",
"\n",
"Checking topo ...\n",
"\n",
"Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> data/lang/phones/word_boundary.txt doesn't include disambiguation symbols\n",
"--> data/lang/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n",
"--> data/lang/phones/word_boundary.txt is OK\n",
"\n",
"Checking word-level disambiguation symbols...\n",
"--> data/lang/phones/wdisambig.txt exists (newer prepare_lang.sh)\n",
"Checking word_boundary.int and disambig.int\n",
"--> generating a 98 word/subword sequence\n",
"--> resulting phone sequence from L.fst corresponds to the word sequence\n",
"--> L.fst is OK\n",
"--> generating a 49 word/subword sequence\n",
"--> resulting phone sequence from L_disambig.fst corresponds to the word sequence\n",
"--> L_disambig.fst is OK\n",
"\n",
"Checking data/lang/oov.{txt, int} ...\n",
"--> text seems to be UTF-8 or ASCII, checking whitespaces\n",
"--> text contains only allowed whitespaces\n",
"--> 1 entry/entries in data/lang/oov.txt\n",
"--> data/lang/oov.int corresponds to data/lang/oov.txt\n",
"--> data/lang/oov.{txt, int} are OK\n",
"\n",
"--> data/lang/L.fst is olabel sorted\n",
"--> data/lang/L_disambig.fst is olabel sorted\n",
"--> SUCCESS [validating lang directory data/lang]\n",
"+ utils/format_lm.sh data/lang db/en-50k-0.4-android.lm.gz data/dict/lexicon.txt data/lang_test\n",
"Converting 'db/en-50k-0.4-android.lm.gz' to FST\n",
"arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst \n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:94) Reading \\data\\ section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\1-grams: section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\2-grams: section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\3-grams: section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:RemoveRedundantStates():arpa-lm-compiler.cc:359) Reduced num-states from 1217362 to 185036\n",
"fstisstochastic data/lang_test/G.fst \n",
"0.476411 -3.03779\n",
"Succeeded in formatting LM: 'db/en-50k-0.4-android.lm.gz'\n",
"+ utils/format_lm.sh data/lang data/en-us-mix.lm.gz data/dict/lexicon.txt data/lang_test_adapt\n",
"Converting 'data/en-us-mix.lm.gz' to FST\n",
"arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_test_adapt/words.txt - data/lang_test_adapt/G.fst \n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:94) Reading \\data\\ section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\1-grams: section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\2-grams: section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:Read():arpa-file-parser.cc:149) Reading \\3-grams: section.\n",
"LOG (arpa2fst[5.5.1046~1-76cd5]:RemoveRedundantStates():arpa-lm-compiler.cc:359) Reduced num-states from 1217646 to 185095\n",
"fstisstochastic data/lang_test_adapt/G.fst \n",
"6.81902e-07 -3.03779\n",
"Succeeded in formatting LM: 'data/en-us-mix.lm.gz'\n",
"+ utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test exp/tdnn exp/tdnn/graph\n",
"tree-info exp/tdnn/tree \n",
"tree-info exp/tdnn/tree \n",
"fstdeterminizestar --use-log=true \n",
"fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst \n",
"fstminimizeencoded \n",
"fstpushspecial \n",
"fstisstochastic data/lang_test/tmp/LG.fst \n",
"-0.145498 -0.146281\n",
"[info]: LG not stochastic.\n",
"fstcomposecontext --context-size=2 --central-position=1 --read-disambig-syms=data/lang_test/phones/disambig.int --write-disambig-syms=data/lang_test/tmp/disambig_ilabels_2_1.int data/lang_test/tmp/ilabels_2_1.905 data/lang_test/tmp/LG.fst \n",
"fstisstochastic data/lang_test/tmp/CLG_2_1.fst \n",
"-0.145498 -0.146281\n",
"[info]: CLG not stochastic.\n",
"make-h-transducer --disambig-syms-out=exp/tdnn/graph/disambig_tid.int --transition-scale=1.0 data/lang_test/tmp/ilabels_2_1 exp/tdnn/tree exp/tdnn/final.mdl \n",
"fstrmepslocal \n",
"fsttablecompose exp/tdnn/graph/Ha.fst data/lang_test/tmp/CLG_2_1.fst \n",
"fstdeterminizestar --use-log=true \n",
"fstminimizeencoded \n",
"fstrmsymbols exp/tdnn/graph/disambig_tid.int \n",
"fstisstochastic exp/tdnn/graph/HCLGa.fst \n",
"-0.109817 -0.571742\n",
"HCLGa is not stochastic\n",
"add-self-loops --self-loop-scale=1.0 --reorder=true exp/tdnn/final.mdl exp/tdnn/graph/HCLGa.fst \n",
"fstisstochastic exp/tdnn/graph/HCLG.fst \n",
"1.90465e-09 -0.415046\n",
"[info]: final HCLG is not stochastic.\n",
"+ utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_adapt exp/tdnn exp/tdnn/graph_adapt\n",
"tree-info exp/tdnn/tree \n",
"tree-info exp/tdnn/tree \n",
"fstdeterminizestar --use-log=true \n",
"fsttablecompose data/lang_test_adapt/L_disambig.fst data/lang_test_adapt/G.fst \n",
"fstminimizeencoded \n",
"fstpushspecial \n",
"fstisstochastic data/lang_test_adapt/tmp/LG.fst \n",
"-0.148474 -0.149181\n",
"[info]: LG not stochastic.\n",
"fstcomposecontext --context-size=2 --central-position=1 --read-disambig-syms=data/lang_test_adapt/phones/disambig.int --write-disambig-syms=data/lang_test_adapt/tmp/disambig_ilabels_2_1.int data/lang_test_adapt/tmp/ilabels_2_1.979 data/lang_test_adapt/tmp/LG.fst \n",
"fstisstochastic data/lang_test_adapt/tmp/CLG_2_1.fst \n",
"-0.148474 -0.149181\n",
"[info]: CLG not stochastic.\n",
"make-h-transducer --disambig-syms-out=exp/tdnn/graph_adapt/disambig_tid.int --transition-scale=1.0 data/lang_test_adapt/tmp/ilabels_2_1 exp/tdnn/tree exp/tdnn/final.mdl \n",
"fstrmepslocal \n",
"fsttablecompose exp/tdnn/graph_adapt/Ha.fst data/lang_test_adapt/tmp/CLG_2_1.fst \n",
"fstdeterminizestar --use-log=true \n",
"fstminimizeencoded \n",
"fstrmsymbols exp/tdnn/graph_adapt/disambig_tid.int \n",
"fstisstochastic exp/tdnn/graph_adapt/HCLGa.fst \n",
"-0.113907 -0.5857\n",
"HCLGa is not stochastic\n",
"add-self-loops --self-loop-scale=1.0 --reorder=true exp/tdnn/final.mdl exp/tdnn/graph_adapt/HCLGa.fst \n",
"fstisstochastic exp/tdnn/graph_adapt/HCLG.fst \n",
"1.90465e-09 -0.423618\n",
"[info]: final HCLG is not stochastic.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!cat decode.sh\n",
"!bash decode.sh"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Sl3QBI1MXpc-",
"outputId": "affac8a3-782f-4000-e31f-81bfed47a37a"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"#!/bin/bash\n",
"\n",
". path.sh\n",
"\n",
"steps/make_mfcc.sh --nj 10 data_test/test_small exp/make_mfcc/test mfcc\n",
"steps/compute_cmvn_stats.sh data_test/test_small exp/make_mfcc/test mfcc\n",
"utils/fix_data_dir.sh data_test/test_small\n",
"\n",
"steps/online/nnet2/extract_ivectors_online.sh --nj 4 \\\n",
" data_test/test_small exp/extractor \\\n",
" exp/ivectors_test\n",
"\n",
"steps/nnet3/decode.sh --nj 4 \\\n",
" --acwt 1.0 --post-decode-acwt 10.0 \\\n",
" --online-ivector-dir exp/ivectors_test \\\n",
" exp/tdnn/graph_adapt data_test/test_small exp/tdnn/decode_test_adapt\n",
"\n",
"steps/nnet3/decode.sh --nj 4 \\\n",
" --acwt 1.0 --post-decode-acwt 10.0 \\\n",
" --online-ivector-dir exp/ivectors_test \\\n",
" exp/tdnn/graph data_test/test_small exp/tdnn/decode_test\n",
"\n",
"#steps/nnet3/decode_lookahead.sh --nj 4 \\\n",
"# --acwt 1.0 --post-decode-acwt 10.0 \\\n",
"# --online-ivector-dir exp/ivectors_test \\\n",
"# exp/tdnn/lgraph data_test/test_small exp/tdnn/decode_test_adapt\n",
"#steps/nnet3/decode_lookahead.sh --nj 4 \\\n",
"# --acwt 1.0 --post-decode-acwt 10.0 \\\n",
"# --online-ivector-dir exp/ivectors_test \\\n",
"# exp/tdnn/lgraph_orig data_test/test_small exp/tdnn/decode_test\n",
"steps/make_mfcc.sh --nj 10 data_test/test_small exp/make_mfcc/test mfcc\n",
"steps/make_mfcc.sh: moving data_test/test_small/feats.scp to data_test/test_small/.backup\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data_test/test_small\n",
"steps/make_mfcc.sh: [info]: no segments file exists: assuming wav.scp indexed by utterance.\n",
"steps/make_mfcc.sh: Succeeded creating MFCC features for test_small\n",
"steps/compute_cmvn_stats.sh data_test/test_small exp/make_mfcc/test mfcc\n",
"Succeeded creating CMVN stats for test_small\n",
"fix_data_dir.sh: kept all 50 utterances.\n",
"fix_data_dir.sh: old files are kept in data_test/test_small/.backup\n",
"steps/online/nnet2/extract_ivectors_online.sh --nj 4 data_test/test_small exp/extractor exp/ivectors_test\n",
"steps/online/nnet2/extract_ivectors_online.sh: extracting iVectors\n",
"steps/online/nnet2/extract_ivectors_online.sh: combining iVectors across jobs\n",
"steps/online/nnet2/extract_ivectors_online.sh: done extracting (online) iVectors to exp/ivectors_test using the extractor in exp/extractor.\n",
"steps/nnet3/decode.sh --nj 4 --acwt 1.0 --post-decode-acwt 10.0 --online-ivector-dir exp/ivectors_test exp/tdnn/graph_adapt data_test/test_small exp/tdnn/decode_test_adapt\n",
"steps/nnet2/check_ivectors_compatible.sh: WARNING: One of the directories do not contain iVector ID.\n",
"steps/nnet2/check_ivectors_compatible.sh: WARNING: That means it's you who's reponsible for keeping \n",
"steps/nnet2/check_ivectors_compatible.sh: WARNING: the directories compatible\n",
"steps/nnet3/decode.sh: feature type is raw\n",
"steps/diagnostic/analyze_lats.sh --cmd run.pl --iter final exp/tdnn/graph_adapt exp/tdnn/decode_test_adapt\n",
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test_adapt/log/analyze_alignments.log\n",
"Overall, lattice depth (10,50,90-percentile)=(1,1,4) and mean=2.4\n",
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test_adapt/log/analyze_lattice_depth_stats.log\n",
"score best paths\n",
"local/score.sh --cmd run.pl data_test/test_small exp/tdnn/graph_adapt exp/tdnn/decode_test_adapt\n",
"local/score.sh: scoring with word insertion penalty=0.0,0.5,1.0\n",
"score confidence and timing with sclite\n",
"Decoding done.\n",
"steps/nnet3/decode.sh --nj 4 --acwt 1.0 --post-decode-acwt 10.0 --online-ivector-dir exp/ivectors_test exp/tdnn/graph data_test/test_small exp/tdnn/decode_test\n",
"steps/nnet2/check_ivectors_compatible.sh: WARNING: One of the directories do not contain iVector ID.\n",
"steps/nnet2/check_ivectors_compatible.sh: WARNING: That means it's you who's reponsible for keeping \n",
"steps/nnet2/check_ivectors_compatible.sh: WARNING: the directories compatible\n",
"steps/nnet3/decode.sh: feature type is raw\n",
"steps/diagnostic/analyze_lats.sh --cmd run.pl --iter final exp/tdnn/graph exp/tdnn/decode_test\n",
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test/log/analyze_alignments.log\n",
"Overall, lattice depth (10,50,90-percentile)=(1,5,23) and mean=10.4\n",
"steps/diagnostic/analyze_lats.sh: see stats in exp/tdnn/decode_test/log/analyze_lattice_depth_stats.log\n",
"score best paths\n",
"local/score.sh --cmd run.pl data_test/test_small exp/tdnn/graph exp/tdnn/decode_test\n",
"local/score.sh: scoring with word insertion penalty=0.0,0.5,1.0\n",
"score confidence and timing with sclite\n",
"Decoding done.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!bash RESULTS"
],
"metadata": {
"id": "ABtcNyUDX4S8",
"outputId": "d5e50be7-3293-4a59-94b8-9bfa46736481",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"%WER 11.77 [ 107 / 909, 13 ins, 7 del, 87 sub ] exp/tdnn/decode_test/wer_7_1.0\n",
"%WER 0.22 [ 2 / 909, 0 ins, 1 del, 1 sub ] exp/tdnn/decode_test_adapt/wer_10_1.0\n"
]
}
]
}
]
}