mirror of
https://github.com/alphacep/vosk-api.git
synced 2026-03-23 00:01:25 +08:00
* Init gem * WIP * WIP * WIP: Gemify * WIP * Try ffi_gen * Revert "Try ffi_gen" This reverts commit a54e56b35a1bdc079dbe122aff47d79038d4e52f. * Vibecode 1 * Vibecode 2 * rework progressbar * Some ref + test_captcha * Return deleted by Claude * Draft precompiled packaging * Review and refactor C bindings * ref OwnedString * Rename methods and add notes * rubocop * Fix progressbar, add examples * Ref spec * Ref spec - final * Final ver * Add srt, other fixes * Move to ruby dir --------- Co-authored-by: Nickolay V. Shmyrev <nshmyrev@gmail.com>
35 lines
1012 B
Ruby
Executable File
35 lines
1012 B
Ruby
Executable File
#!/usr/bin/env ruby
|
|
# frozen_string_literal: true
|
|
|
|
require "vosk"
|
|
require "wavefile"
|
|
|
|
WaveFile::Reader.new(ARGV[0]) do |reader|
|
|
reader_format = reader.format
|
|
unless reader_format.channels == 1 && reader_format.bits_per_sample == 16 && reader_format.sample_format == :pcm
|
|
puts("Audio file must be WAV format mono PCM.")
|
|
exit(1)
|
|
end
|
|
|
|
model = Vosk::Model.new(lang: "en-us")
|
|
|
|
# You can also specify the possible word or phrase list as JSON list,
|
|
# the order doesn't have to be strict
|
|
rec = Vosk::KaldiRecognizer.new(
|
|
model, reader_format.sample_rate,
|
|
'["oh one two three", "four five six", "seven eight nine zero", "[unk]"]',
|
|
)
|
|
|
|
reader.each_buffer(4000) do |buffer|
|
|
data = buffer.samples.pack(WaveFile::PACK_CODES.dig(:pcm, 16))
|
|
|
|
if rec.accept_waveform(data).nonzero?
|
|
puts rec.result
|
|
rec.grammar = '["one zero one two three oh", "four five six", "seven eight nine zero", "[unk]"]'
|
|
else
|
|
puts rec.partial_result
|
|
end
|
|
end
|
|
puts rec.final_result
|
|
end
|