From a2e38827ef0ac2a233cebebce19ca1ffcfdb1b6e Mon Sep 17 00:00:00 2001 From: Nickolay Shmyrev Date: Tue, 7 May 2019 19:21:36 +0200 Subject: [PATCH] Simple Kaldi websocket server --- COPYING | 176 ++++++++++++++++++++++++++++++++++++++++++++ Makefile | 45 +++++++++++ README.md | 1 + asr_server.py | 26 +++++++ kaldi_recognizer.cc | 167 +++++++++++++++++++++++++++++++++++++++++ kaldi_recognizer.h | 35 +++++++++ kaldi_websocket.i | 19 +++++ model.cc | 79 ++++++++++++++++++++ model.h | 67 +++++++++++++++++ 9 files changed, 615 insertions(+) create mode 100644 COPYING create mode 100644 Makefile create mode 100644 README.md create mode 100755 asr_server.py create mode 100644 kaldi_recognizer.cc create mode 100644 kaldi_recognizer.h create mode 100644 kaldi_websocket.i create mode 100644 model.cc create mode 100644 model.h diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..ab6497f --- /dev/null +++ b/COPYING @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. this License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e0594ca --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +#ATLASLIBS = /usr/lib/libatlas.so.3 /usr/lib/libf77blas.so.3 /usr/lib/libcblas.so.3 /usr/lib/liblapack_atlas.so.3 +ATLASLIBS = /usr/lib/x86_64-linux-gnu/libatlas.so.3 /usr/lib/x86_64-linux-gnu/libf77blas.so.3 /usr/lib/x86_64-linux-gnu/libcblas.so.3 /usr/lib/x86_64-linux-gnu/liblapack_atlas.so.3 -Wl,-rpath=/usr/lib/x86_64-linux-gnu +KALDI_ROOT=$(HOME)/voicechat/kaldi +CXX := g++ + +KALDI_FLAGS := -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \ +-Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \ +-DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H -DHAVE_ATLAS \ +-I$(KALDI_ROOT)/tools/ATLAS/include \ +-I$(KALDI_ROOT)/tools/openfst/include -I$(KALDI_ROOT)/src + +CXXFLAGS := -std=c++11 -g -Wall -DPIC -fPIC $(KALDI_FLAGS) `pkg-config --cflags python3` + +KALDI_LIBS = \ + -rdynamic -Wl,-rpath=$(KALDI_ROOT)/tools/openfst/lib \ + $(KALDI_ROOT)/src/online2/kaldi-online2.a \ + $(KALDI_ROOT)/src/decoder/kaldi-decoder.a \ + $(KALDI_ROOT)/src/ivector/kaldi-ivector.a \ + $(KALDI_ROOT)/src/gmm/kaldi-gmm.a \ + $(KALDI_ROOT)/src/nnet3/kaldi-nnet3.a \ + $(KALDI_ROOT)/src/tree/kaldi-tree.a \ + $(KALDI_ROOT)/src/feat/kaldi-feat.a \ + $(KALDI_ROOT)/src/lat/kaldi-lat.a \ + $(KALDI_ROOT)/src/hmm/kaldi-hmm.a \ + $(KALDI_ROOT)/src/transform/kaldi-transform.a \ + $(KALDI_ROOT)/src/cudamatrix/kaldi-cudamatrix.a \ + $(KALDI_ROOT)/src/matrix/kaldi-matrix.a \ + $(KALDI_ROOT)/src/fstext/kaldi-fstext.a \ + $(KALDI_ROOT)/src/util/kaldi-util.a \ + $(KALDI_ROOT)/src/base/kaldi-base.a \ + -L $(KALDI_ROOT)/tools/openfst/lib -lfst \ + $(ATLASLIBS) \ + `pkg-config --libs python3` \ + -lm -lpthread + +all: _kaldi_websocket.so + +_kaldi_websocket.so: kaldi_websocket_wrap.cc kaldi_recognizer.cc model.cc + $(CXX) $(CXXFLAGS) -shared -o $@ kaldi_recognizer.cc model.cc kaldi_websocket_wrap.cc $(KALDI_LIBS) + +kaldi_websocket_wrap.cc: kaldi_websocket.i + swig -python -c++ -o kaldi_websocket_wrap.cc kaldi_websocket.i + +clean: + $(RM) *.so kaldi_websocket_wrap.cc *.o *.pyc kaldi_websocket.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..c099e44 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +A very simple websocket server based on Kaldi diff --git a/asr_server.py b/asr_server.py new file mode 100755 index 0000000..ecfc32d --- /dev/null +++ b/asr_server.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +import asyncio +import pathlib +import websockets +from kaldi_websocket import Model, KaldiRecognizer + +model = Model() + +async def recognize(websocket, path): + rec = KaldiRecognizer(model); + while True: + message = await websocket.recv() + if message == '{"eof" : 1}': + await websocket.send(rec.Result()) + break + if rec.AcceptWaveform(message): + await websocket.send(rec.Result()) + else: + await websocket.send(rec.PartialResult()) + +start_server = websockets.serve( + recognize, '127.0.0.1', 2601) + +asyncio.get_event_loop().run_until_complete(start_server) +asyncio.get_event_loop().run_forever() diff --git a/kaldi_recognizer.cc b/kaldi_recognizer.cc new file mode 100644 index 0000000..8032aa9 --- /dev/null +++ b/kaldi_recognizer.cc @@ -0,0 +1,167 @@ +#include "kaldi_recognizer.h" + +#include "fstext/fstext-utils.h" +#include "lat/sausages.h" + +using namespace fst; +using namespace kaldi::nnet3; + +KaldiRecognizer::KaldiRecognizer(Model &model) : model_(model) { + + feature_info_ = new kaldi::OnlineNnet2FeaturePipelineInfo(model_.feature_config_); + feature_pipeline_ = new kaldi::OnlineNnet2FeaturePipeline (*feature_info_); + silence_weighting_ = new kaldi::OnlineSilenceWeighting(*model_.trans_model_, feature_info_->silence_weighting_config, 3); + + decoder_ = new kaldi::SingleUtteranceNnet3Decoder(model_.nnet3_decoding_config_, + *model_.trans_model_, + *model_.decodable_info_, + *model_.decode_fst_, + feature_pipeline_); + + input_finalized_ = false; +} + +KaldiRecognizer::~KaldiRecognizer() { + delete feature_pipeline_; + delete feature_info_; + delete silence_weighting_; + + delete decoder_; +} + +void KaldiRecognizer::CleanUp() +{ + delete decoder_; + + OnlineIvectorExtractorAdaptationState state(feature_info_->ivector_extractor_info); + feature_pipeline_->GetAdaptationState(&state); + + delete feature_pipeline_; + + feature_pipeline_ = new kaldi::OnlineNnet2FeaturePipeline (*feature_info_); + feature_pipeline_->SetAdaptationState(state); + + delete silence_weighting_; + silence_weighting_ = new kaldi::OnlineSilenceWeighting(*model_.trans_model_, feature_info_->silence_weighting_config, 3); + + decoder_ = new kaldi::SingleUtteranceNnet3Decoder(model_.nnet3_decoding_config_, + *model_.trans_model_, + *model_.decodable_info_, + *model_.decode_fst_, + feature_pipeline_); +} + +bool KaldiRecognizer::AcceptWaveform(const char *data, int len) { + + if (input_finalized_) { + CleanUp(); + input_finalized_ = false; + } + + Vector wave; + wave.Resize(len / 2, kUndefined); + for (int i = 0; i < len / 2; i++) + wave(i) = *(((short *)data) + i); + + feature_pipeline_->AcceptWaveform(8000, wave); + + if (silence_weighting_->Active() && feature_pipeline_->NumFramesReady() > 0 && + feature_pipeline_->IvectorFeature() != NULL) { + std::vector > delta_weights; + silence_weighting_->ComputeCurrentTraceback(decoder_->Decoder()); + silence_weighting_->GetDeltaWeights(feature_pipeline_->NumFramesReady(), + &delta_weights); + feature_pipeline_->IvectorFeature()->UpdateFrameWeights(delta_weights); + } + + decoder_->AdvanceDecoding(); + + if (decoder_->EndpointDetected(model_.endpoint_config_)) { + return true; + } + + return false; +} + +std::string KaldiRecognizer::Result() +{ + + if (!input_finalized_) { + feature_pipeline_->InputFinished(); + + if (silence_weighting_->Active() && feature_pipeline_->NumFramesReady() > 0 && + feature_pipeline_->IvectorFeature() != NULL) { + std::vector > delta_weights; + silence_weighting_->ComputeCurrentTraceback(decoder_->Decoder()); + silence_weighting_->GetDeltaWeights(feature_pipeline_->NumFramesReady(), + &delta_weights); + feature_pipeline_->IvectorFeature()->UpdateFrameWeights(delta_weights); + } + decoder_->AdvanceDecoding(); + decoder_->FinalizeDecoding(); + + input_finalized_ = true; + } + + kaldi::CompactLattice clat; + decoder_->GetLattice(true, &clat); + + CompactLattice aligned_lat; + WordAlignLattice(clat, *model_.trans_model_, *model_.winfo_, 0, &aligned_lat); + MinimumBayesRisk mbr(aligned_lat); + + const std::vector &conf = mbr.GetOneBestConfidences(); + const std::vector &words = mbr.GetOneBest(); + const std::vector > × = + mbr.GetOneBestTimes(); + + int size = words.size(); + + std::stringstream ss; + + // Create JSON object + ss << "{\"result\" : [ "; + for (int i = 0; i < size; i++) { + ss << "{\"word\": \"" << model_.word_syms_->Find(words[i]) << "\", \"start\" : " << times[i].first << "," << + " \"end\" : " << times[i].second << ", \"conf\" : " << conf[i] << "}"; + if (i != size - 1) + ss << ",\n"; + else + ss << "\n"; + } + ss << " ], \"text\" : \""; + for (int i = 0; i < size; i++) { + ss << model_.word_syms_->Find(words[i]); + if (i != size - 1) + ss << " "; + } + ss << "\" }"; + + return ss.str(); +} + +std::string KaldiRecognizer::PartialResult() +{ + decoder_->AdvanceDecoding(); + if (decoder_->NumFramesDecoded() < 50) { + return "{\"partial\" : \"\"}"; + } + + kaldi::Lattice lat; + decoder_->GetBestPath(false, &lat); + std::vector alignment, words; + LatticeWeight weight; + GetLinearSymbolSequence(lat, &alignment, &words, &weight); + + std::ostringstream outss; + outss << "{\"partial\" : \""; + for (size_t i = 0; i < words.size(); i++) { + if (i) { + outss << " "; + } + outss << model_.word_syms_->Find(words[i]); + } + outss << "\"}"; + + return outss.str(); +} diff --git a/kaldi_recognizer.h b/kaldi_recognizer.h new file mode 100644 index 0000000..629cd92 --- /dev/null +++ b/kaldi_recognizer.h @@ -0,0 +1,35 @@ +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "fstext/fstext-lib.h" +#include "fstext/fstext-utils.h" +#include "decoder/lattice-faster-decoder.h" +#include "feat/feature-mfcc.h" +#include "lat/kaldi-lattice.h" +#include "lat/word-align-lattice.h" +#include "nnet3/am-nnet-simple.h" +#include "nnet3/nnet-am-decodable-simple.h" +#include "nnet3/nnet-utils.h" + +#include "model.h" + +using namespace kaldi; + +class KaldiRecognizer { + public: + KaldiRecognizer(Model &model); + ~KaldiRecognizer(); + void CleanUp(); + bool AcceptWaveform(const char *data, int len); + std::string Result(); + std::string PartialResult(); + + private: + Model &model_; + SingleUtteranceNnet3Decoder *decoder_; + + OnlineNnet2FeaturePipelineInfo *feature_info_; + OnlineNnet2FeaturePipeline *feature_pipeline_; + OnlineSilenceWeighting *silence_weighting_; + + bool input_finalized_; +}; diff --git a/kaldi_websocket.i b/kaldi_websocket.i new file mode 100644 index 0000000..862b663 --- /dev/null +++ b/kaldi_websocket.i @@ -0,0 +1,19 @@ +%module kaldi_websocket + +%include +%include +%include + +namespace kaldi { +} + +%pybuffer_binary(const char *data, int len) + +%{ +#include "kaldi_recognizer.h" +#include "model.h" +%} + +%include "kaldi_recognizer.h" +%include "model.h" + diff --git a/model.cc b/model.cc new file mode 100644 index 0000000..92545eb --- /dev/null +++ b/model.cc @@ -0,0 +1,79 @@ +#include "model.h" + +Model::Model() { + + const char *usage = "Read the docs"; + const char *extra_args[] = { + "--feature-type=mfcc", + "--mfcc-config=model/mfcc.conf", + "--min-active=200", + "--max-active=6000", + "--beam=13.0", + "--lattice-beam=6.0", + "--acoustic-scale=1.0", + "--frames-per-chunk=51", + "--frame-subsampling-factor=3", + + "--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10", + "--endpoint.rule2.min-trailing-silence=0.5", + "--endpoint.rule3.min-trailing-silence=1.0", + "--endpoint.rule4.min-trailing-silence=2.0", + + + "--ivector-silence-weighting.silence-weight=0.001", + "--ivector-silence-weighting.silence-phones=1:2:3:4:5:6:7:8:9:10", + "--ivector-extraction-config=model/ivector/ivector.conf", + }; + + kaldi::ParseOptions po(usage); + feature_config_.Register(&po); + nnet3_decoding_config_.Register(&po); + endpoint_config_.Register(&po); + decodable_opts_.Register(&po); + + MfccOptions mfcc_opts; + ReadConfigFromFile("model/mfcc.conf", &mfcc_opts); + sample_frequency = mfcc_opts.frame_opts.samp_freq; + KALDI_LOG << "Sample rate is " << sample_frequency; + + std::vector args; + args.push_back("server"); + args.insert(args.end(), extra_args, extra_args + sizeof(extra_args) / sizeof(extra_args[0])); + po.Read(args.size(), args.data()); + + nnet3_rxfilename_ = "model/final.mdl"; + word_syms_rxfilename_ = "model/words.txt"; + fst_rxfilename_ = "model/HCLG.fst"; + + trans_model_ = new kaldi::TransitionModel(); + nnet_ = new kaldi::nnet3::AmNnetSimple(); + { + bool binary; + kaldi::Input ki(nnet3_rxfilename_, &binary); + trans_model_->Read(ki.Stream(), binary); + nnet_->Read(ki.Stream(), binary); + SetBatchnormTestMode(true, &(nnet_->GetNnet())); + SetDropoutTestMode(true, &(nnet_->GetNnet())); + nnet3::CollapseModel(nnet3::CollapseModelConfig(), &(nnet_->GetNnet())); + } + + decodable_info_ = new nnet3::DecodableNnetSimpleLoopedInfo(decodable_opts_, + nnet_); + decode_fst_ = fst::ReadFstKaldiGeneric(fst_rxfilename_); + + word_syms_ = NULL; + if (!(word_syms_ = fst::SymbolTable::ReadText(word_syms_rxfilename_))) + KALDI_ERR << "Could not read symbol table from file " << word_syms_rxfilename_; + + kaldi::WordBoundaryInfoNewOpts opts; + winfo_ = new kaldi::WordBoundaryInfo(opts, "model/word_boundary.int"); +} + +Model::~Model() { + delete decodable_info_; + delete decode_fst_; + delete trans_model_; + delete nnet_; + delete word_syms_; + delete winfo_; +} diff --git a/model.h b/model.h new file mode 100644 index 0000000..5dcd7f3 --- /dev/null +++ b/model.h @@ -0,0 +1,67 @@ +// model.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef model_H_ +#define model_H_ + +#include "base/kaldi-common.h" +#include "fstext/fstext-lib.h" +#include "fstext/fstext-utils.h" +#include "online2/onlinebin-util.h" +#include "online2/online-timing.h" +#include "online2/online-endpoint.h" +#include "online2/online-nnet3-decoding.h" +#include "online2/online-feature-pipeline.h" +#include "lat/lattice-functions.h" +#include "lat/sausages.h" +#include "lat/word-align-lattice.h" +#include "lm/const-arpa-lm.h" +#include "util/parse-options.h" +#include "nnet3/nnet-utils.h" +#include "rnnlm/rnnlm-utils.h" + +using namespace kaldi; + +class KaldiRecognizer; + +class Model { + +public: + Model(); + ~Model(); + +protected: + friend class KaldiRecognizer; + + std::string nnet3_rxfilename_; + std::string fst_rxfilename_; + std::string word_syms_rxfilename_; + + kaldi::OnlineEndpointConfig endpoint_config_; + kaldi::LatticeFasterDecoderConfig nnet3_decoding_config_; + kaldi::nnet3::NnetSimpleLoopedComputationOptions decodable_opts_; + + kaldi::OnlineNnet2FeaturePipelineConfig feature_config_; + kaldi::BaseFloat sample_frequency; + + kaldi::nnet3::DecodableNnetSimpleLoopedInfo *decodable_info_; + fst::Fst *decode_fst_; + kaldi::TransitionModel *trans_model_; + kaldi::nnet3::AmNnetSimple *nnet_; + fst::SymbolTable *word_syms_; + kaldi::WordBoundaryInfo *winfo_; +}; + +#endif /* model_H_ */