mirror of
https://github.com/alphacep/vosk-api.git
synced 2026-01-09 07:32:34 +08:00
Add endpointer mode to java and csharp API
This commit is contained in:
parent
24f0e0cff8
commit
07a0ddb467
@ -10,7 +10,7 @@ buildscript {
|
||||
}
|
||||
|
||||
allprojects {
|
||||
version = '0.3.50'
|
||||
version = '0.3.70'
|
||||
}
|
||||
|
||||
subprojects {
|
||||
|
||||
@ -56,6 +56,10 @@ public class LibVosk {
|
||||
|
||||
public static native void vosk_recognizer_reset(Pointer recognizer);
|
||||
|
||||
public static native void vosk_recognizer_set_endpointer_mode(Pointer recognizer, int mode);
|
||||
|
||||
public static native void vosk_recognizer_set_endpointer_delays(Pointer recognizer, float t_start_max, float t_end, float t_max);
|
||||
|
||||
public static native void vosk_recognizer_free(Pointer recognizer);
|
||||
|
||||
public static native Pointer vosk_text_processor_new(String verbalizer, String tagger);
|
||||
|
||||
@ -236,6 +236,34 @@ public class Recognizer extends PointerType implements AutoCloseable {
|
||||
LibVosk.vosk_recognizer_reset(this.getPointer());
|
||||
}
|
||||
|
||||
/**
|
||||
* Endpointer delay mode
|
||||
*/
|
||||
public class EndpointerMode {
|
||||
public static final int DEFAULT = 0;
|
||||
public static final int SHORT = 1;
|
||||
public static final int LONG = 2;
|
||||
public static final int VERY_LONG = 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures endpointer mode for recognizer
|
||||
*/
|
||||
public void setEndpointerMode(int mode) {
|
||||
LibVosk.vosk_recognizer_set_endpointer_mode(this.getPointer(), mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set endpointer delays
|
||||
*
|
||||
* @param t_start_max timeout for stopping recognition in case of initial silence (usually around 5.0)
|
||||
* @param t_end timeout for stopping recognition in milliseconds after we recognized something (usually around 0.5 - 1.0)
|
||||
* @param t_max timeout for forcing utterance end in milliseconds (usually around 20-30)
|
||||
**/
|
||||
public void setEndpointerDelays(float t_start_max, float t_end, float t_max) {
|
||||
LibVosk.vosk_recognizer_set_endpointer_delays(this.getPointer(), t_start_max, t_end, t_max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases recognizer object.
|
||||
* Underlying model is also unreferenced and if needed, released.
|
||||
|
||||
@ -28,6 +28,9 @@ public class VoskDemo
|
||||
{
|
||||
// Demo float array
|
||||
VoskRecognizer rec = new VoskRecognizer(model, 16000.0f);
|
||||
|
||||
rec.SetEndpointerMode(EndpointerMode.LONG);
|
||||
|
||||
using(Stream source = File.OpenRead("test.wav")) {
|
||||
byte[] buffer = new byte[4096];
|
||||
int bytesRead;
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net5.0</TargetFramework>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<RootNamespace>VoskDemo</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
@ -11,7 +11,7 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Vosk" Version="0.3.50" />
|
||||
<PackageReference Include="Vosk" Version="0.3.70" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
<package>
|
||||
<metadata>
|
||||
<id>Vosk</id>
|
||||
<version>0.3.50</version>
|
||||
<version>0.3.70</version>
|
||||
<authors>Alpha Cephei Inc</authors>
|
||||
<owners>Alpha Cephei Inc</owners>
|
||||
<license type="expression">Apache-2.0</license>
|
||||
@ -23,10 +23,10 @@ Vosk scales from small devices like Raspberry Pi or Android smartphone to big cl
|
||||
<copyright>Copyright 2020-2050 Alpha Cephei Inc</copyright>
|
||||
<tags>speech recognition voice stt asr speech-to-text ai offline privacy</tags>
|
||||
<dependencies>
|
||||
<group targetFramework=".NETStandard2.0"/>
|
||||
<group targetFramework="net8.0"/>
|
||||
</dependencies>
|
||||
</metadata>
|
||||
<files>
|
||||
<file src="**" exclude="src/*.cs;build.sh;**/.keep-me;*.nupkg" />
|
||||
<file src="**" exclude="bin/**;obj/**;build.sh;src/*.cs;*.nupkg;**/.keep-me" />
|
||||
</files>
|
||||
</package>
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
mcs -out:lib/netstandard2.0/Vosk.dll -target:library src/*.cs
|
||||
nuget pack
|
||||
rm -rf bin lib obj
|
||||
/home/shmyrev/local/dotnet/dotnet pack Vosk.csproj -p:NuspecFile=Vosk.nuspec -o .
|
||||
|
||||
@ -65,6 +65,12 @@ class VoskPINVOKE {
|
||||
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_recognizer_reset")]
|
||||
public static extern void VoskRecognizer_Reset(global::System.Runtime.InteropServices.HandleRef jarg1);
|
||||
|
||||
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_recognizer_set_endpointer_mode")]
|
||||
public static extern void VoskRecognizer_SetEndpointerMode(global::System.Runtime.InteropServices.HandleRef jarg1, int jarg2);
|
||||
|
||||
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_recognizer_set_endpointer_delays")]
|
||||
public static extern void VoskRecognizer_SetEndpointerDelays(global::System.Runtime.InteropServices.HandleRef jarg1, float jarg2, float jarg3, float jarg4);
|
||||
|
||||
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_set_log_level")]
|
||||
public static extern void SetLogLevel(int jarg1);
|
||||
|
||||
@ -107,7 +113,6 @@ class VoskPINVOKE {
|
||||
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint = "vosk_batch_recognizer_get_pending_chunks")]
|
||||
public static extern int VoskBatchRecognizer_GetPendingChunks(global::System.Runtime.InteropServices.HandleRef jarg1);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -1,5 +1,12 @@
|
||||
namespace Vosk {
|
||||
|
||||
public enum EndpointerMode {
|
||||
DEFAULT = 0,
|
||||
SHORT = 1,
|
||||
LONG = 2,
|
||||
VERY_LONG = 3
|
||||
}
|
||||
|
||||
public class VoskRecognizer : System.IDisposable {
|
||||
private System.Runtime.InteropServices.HandleRef handle;
|
||||
|
||||
@ -91,6 +98,14 @@ public class VoskRecognizer : System.IDisposable {
|
||||
VoskPINVOKE.VoskRecognizer_Reset(handle);
|
||||
}
|
||||
|
||||
public void SetEndpointerMode(EndpointerMode mode) {
|
||||
VoskPINVOKE.VoskRecognizer_SetEndpointerMode(handle, (int) mode);
|
||||
}
|
||||
|
||||
public void SetEndpointerDelays(float t_start_max, float t_end, float t_max) {
|
||||
VoskPINVOKE.VoskRecognizer_SetEndpointerDelays(handle, t_start_max, t_end, t_max);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -82,6 +82,10 @@ public class LibVosk {
|
||||
|
||||
public static native void vosk_recognizer_reset(Pointer recognizer);
|
||||
|
||||
public static native void vosk_recognizer_set_endpointer_mode(Pointer recognizer, int mode);
|
||||
|
||||
public static native void vosk_recognizer_set_endpointer_delays(Pointer recognizer, float t_start_max, float t_end, float t_max);
|
||||
|
||||
public static native void vosk_recognizer_free(Pointer recognizer);
|
||||
|
||||
public static native Pointer vosk_text_processor_new(String verbalizer, String tagger);
|
||||
|
||||
@ -236,6 +236,34 @@ public class Recognizer extends PointerType implements AutoCloseable {
|
||||
LibVosk.vosk_recognizer_reset(this.getPointer());
|
||||
}
|
||||
|
||||
/**
|
||||
* Endpointer delay mode
|
||||
*/
|
||||
public class EndpointerMode {
|
||||
public static final int DEFAULT = 0;
|
||||
public static final int SHORT = 1;
|
||||
public static final int LONG = 2;
|
||||
public static final int VERY_LONG = 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures endpointer mode for recognizer
|
||||
*/
|
||||
public void setEndpointerMode(int mode) {
|
||||
LibVosk.vosk_recognizer_set_endpointer_mode(this.getPointer(), mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set endpointer delays
|
||||
*
|
||||
* @param t_start_max timeout for stopping recognition in case of initial silence (usually around 5.0)
|
||||
* @param t_end timeout for stopping recognition in milliseconds after we recognized something (usually around 0.5 - 1.0)
|
||||
* @param t_max timeout for forcing utterance end in milliseconds (usually around 20-30)
|
||||
**/
|
||||
public void setEndpointerDelays(float t_start_max, float t_end, float t_max) {
|
||||
LibVosk.vosk_recognizer_set_endpointer_delays(this.getPointer(), t_start_max, t_end, t_max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Releases recognizer object.
|
||||
* Underlying model is also unreferenced and if needed, released.
|
||||
|
||||
@ -15,6 +15,7 @@ import javax.sound.sampled.UnsupportedAudioFileException;
|
||||
|
||||
import org.vosk.LogLevel;
|
||||
import org.vosk.Recognizer;
|
||||
import org.vosk.Recognizer.EndpointerMode;
|
||||
import org.vosk.LibVosk;
|
||||
import org.vosk.Model;
|
||||
import org.vosk.TextProcessor;
|
||||
@ -96,6 +97,15 @@ public class DecoderTest {
|
||||
Assert.assertTrue(true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void decoderEndpointerDelays() throws IOException, UnsupportedAudioFileException {
|
||||
try (Model model = new Model("model");
|
||||
Recognizer recognizer = new Recognizer(model, 16000)) {
|
||||
recognizer.setEndpointerMode(EndpointerMode.VERY_LONG);
|
||||
recognizer.setEndpointerDelays(5.0f, 3.0f, 50.0f);
|
||||
}
|
||||
Assert.assertTrue(true);
|
||||
}
|
||||
|
||||
@Test(expected = IOException.class)
|
||||
public void decoderTestException() throws IOException {
|
||||
|
||||
@ -22,4 +22,4 @@ linkerOpts.linux = \
|
||||
|
||||
linkerOpts.linux_x64 = \
|
||||
-L/usr/lib64/ \
|
||||
-L/usr/local/lib64/
|
||||
-L/usr/local/lib64/
|
||||
|
||||
@ -45,7 +45,7 @@ with open("README.md", "rb") as fh:
|
||||
|
||||
setuptools.setup(
|
||||
name="vosk",
|
||||
version="0.3.50",
|
||||
version="0.3.70",
|
||||
author="Alpha Cephei Inc",
|
||||
author_email="contact@alphacephei.com",
|
||||
description="Offline open source speech recognition API based on Kaldi and Vosk",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user