Add endpointer mode to java and csharp API

This commit is contained in:
Nickolay Shmyrev 2025-09-03 07:14:40 +02:00
parent 24f0e0cff8
commit 07a0ddb467
15 changed files with 109 additions and 12 deletions

View File

@ -10,7 +10,7 @@ buildscript {
}
allprojects {
version = '0.3.50'
version = '0.3.70'
}
subprojects {

View File

@ -56,6 +56,10 @@ public class LibVosk {
public static native void vosk_recognizer_reset(Pointer recognizer);
public static native void vosk_recognizer_set_endpointer_mode(Pointer recognizer, int mode);
public static native void vosk_recognizer_set_endpointer_delays(Pointer recognizer, float t_start_max, float t_end, float t_max);
public static native void vosk_recognizer_free(Pointer recognizer);
public static native Pointer vosk_text_processor_new(String verbalizer, String tagger);

View File

@ -236,6 +236,34 @@ public class Recognizer extends PointerType implements AutoCloseable {
LibVosk.vosk_recognizer_reset(this.getPointer());
}
/**
* Endpointer delay mode
*/
public class EndpointerMode {
public static final int DEFAULT = 0;
public static final int SHORT = 1;
public static final int LONG = 2;
public static final int VERY_LONG = 3;
}
/**
* Configures endpointer mode for recognizer
*/
public void setEndpointerMode(int mode) {
LibVosk.vosk_recognizer_set_endpointer_mode(this.getPointer(), mode);
}
/**
* Set endpointer delays
*
* @param t_start_max timeout for stopping recognition in case of initial silence (usually around 5.0)
* @param t_end timeout for stopping recognition in milliseconds after we recognized something (usually around 0.5 - 1.0)
* @param t_max timeout for forcing utterance end in milliseconds (usually around 20-30)
**/
public void setEndpointerDelays(float t_start_max, float t_end, float t_max) {
LibVosk.vosk_recognizer_set_endpointer_delays(this.getPointer(), t_start_max, t_end, t_max);
}
/**
* Releases recognizer object.
* Underlying model is also unreferenced and if needed, released.

View File

@ -28,6 +28,9 @@ public class VoskDemo
{
// Demo float array
VoskRecognizer rec = new VoskRecognizer(model, 16000.0f);
rec.SetEndpointerMode(EndpointerMode.LONG);
using(Stream source = File.OpenRead("test.wav")) {
byte[] buffer = new byte[4096];
int bytesRead;

View File

@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>VoskDemo</RootNamespace>
</PropertyGroup>
@ -11,7 +11,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Vosk" Version="0.3.50" />
<PackageReference Include="Vosk" Version="0.3.70" />
</ItemGroup>
</Project>

View File

@ -2,7 +2,7 @@
<package>
<metadata>
<id>Vosk</id>
<version>0.3.50</version>
<version>0.3.70</version>
<authors>Alpha Cephei Inc</authors>
<owners>Alpha Cephei Inc</owners>
<license type="expression">Apache-2.0</license>
@ -23,10 +23,10 @@ Vosk scales from small devices like Raspberry Pi or Android smartphone to big cl
<copyright>Copyright 2020-2050 Alpha Cephei Inc</copyright>
<tags>speech recognition voice stt asr speech-to-text ai offline privacy</tags>
<dependencies>
<group targetFramework=".NETStandard2.0"/>
<group targetFramework="net8.0"/>
</dependencies>
</metadata>
<files>
<file src="**" exclude="src/*.cs;build.sh;**/.keep-me;*.nupkg" />
<file src="**" exclude="bin/**;obj/**;build.sh;src/*.cs;*.nupkg;**/.keep-me" />
</files>
</package>

View File

@ -1,2 +1,2 @@
mcs -out:lib/netstandard2.0/Vosk.dll -target:library src/*.cs
nuget pack
rm -rf bin lib obj
/home/shmyrev/local/dotnet/dotnet pack Vosk.csproj -p:NuspecFile=Vosk.nuspec -o .

View File

@ -65,6 +65,12 @@ class VoskPINVOKE {
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_recognizer_reset")]
public static extern void VoskRecognizer_Reset(global::System.Runtime.InteropServices.HandleRef jarg1);
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_recognizer_set_endpointer_mode")]
public static extern void VoskRecognizer_SetEndpointerMode(global::System.Runtime.InteropServices.HandleRef jarg1, int jarg2);
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_recognizer_set_endpointer_delays")]
public static extern void VoskRecognizer_SetEndpointerDelays(global::System.Runtime.InteropServices.HandleRef jarg1, float jarg2, float jarg3, float jarg4);
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint="vosk_set_log_level")]
public static extern void SetLogLevel(int jarg1);
@ -107,7 +113,6 @@ class VoskPINVOKE {
[global::System.Runtime.InteropServices.DllImport("libvosk", EntryPoint = "vosk_batch_recognizer_get_pending_chunks")]
public static extern int VoskBatchRecognizer_GetPendingChunks(global::System.Runtime.InteropServices.HandleRef jarg1);
}
}
}

View File

@ -1,5 +1,12 @@
namespace Vosk {
public enum EndpointerMode {
DEFAULT = 0,
SHORT = 1,
LONG = 2,
VERY_LONG = 3
}
public class VoskRecognizer : System.IDisposable {
private System.Runtime.InteropServices.HandleRef handle;
@ -91,6 +98,14 @@ public class VoskRecognizer : System.IDisposable {
VoskPINVOKE.VoskRecognizer_Reset(handle);
}
public void SetEndpointerMode(EndpointerMode mode) {
VoskPINVOKE.VoskRecognizer_SetEndpointerMode(handle, (int) mode);
}
public void SetEndpointerDelays(float t_start_max, float t_end, float t_max) {
VoskPINVOKE.VoskRecognizer_SetEndpointerDelays(handle, t_start_max, t_end, t_max);
}
}
}

View File

@ -82,6 +82,10 @@ public class LibVosk {
public static native void vosk_recognizer_reset(Pointer recognizer);
public static native void vosk_recognizer_set_endpointer_mode(Pointer recognizer, int mode);
public static native void vosk_recognizer_set_endpointer_delays(Pointer recognizer, float t_start_max, float t_end, float t_max);
public static native void vosk_recognizer_free(Pointer recognizer);
public static native Pointer vosk_text_processor_new(String verbalizer, String tagger);

View File

@ -236,6 +236,34 @@ public class Recognizer extends PointerType implements AutoCloseable {
LibVosk.vosk_recognizer_reset(this.getPointer());
}
/**
* Endpointer delay mode
*/
public class EndpointerMode {
public static final int DEFAULT = 0;
public static final int SHORT = 1;
public static final int LONG = 2;
public static final int VERY_LONG = 3;
}
/**
* Configures endpointer mode for recognizer
*/
public void setEndpointerMode(int mode) {
LibVosk.vosk_recognizer_set_endpointer_mode(this.getPointer(), mode);
}
/**
* Set endpointer delays
*
* @param t_start_max timeout for stopping recognition in case of initial silence (usually around 5.0)
* @param t_end timeout for stopping recognition in milliseconds after we recognized something (usually around 0.5 - 1.0)
* @param t_max timeout for forcing utterance end in milliseconds (usually around 20-30)
**/
public void setEndpointerDelays(float t_start_max, float t_end, float t_max) {
LibVosk.vosk_recognizer_set_endpointer_delays(this.getPointer(), t_start_max, t_end, t_max);
}
/**
* Releases recognizer object.
* Underlying model is also unreferenced and if needed, released.

View File

@ -15,6 +15,7 @@ import javax.sound.sampled.UnsupportedAudioFileException;
import org.vosk.LogLevel;
import org.vosk.Recognizer;
import org.vosk.Recognizer.EndpointerMode;
import org.vosk.LibVosk;
import org.vosk.Model;
import org.vosk.TextProcessor;
@ -96,6 +97,15 @@ public class DecoderTest {
Assert.assertTrue(true);
}
@Test
public void decoderEndpointerDelays() throws IOException, UnsupportedAudioFileException {
try (Model model = new Model("model");
Recognizer recognizer = new Recognizer(model, 16000)) {
recognizer.setEndpointerMode(EndpointerMode.VERY_LONG);
recognizer.setEndpointerDelays(5.0f, 3.0f, 50.0f);
}
Assert.assertTrue(true);
}
@Test(expected = IOException.class)
public void decoderTestException() throws IOException {

View File

@ -22,4 +22,4 @@ linkerOpts.linux = \
linkerOpts.linux_x64 = \
-L/usr/lib64/ \
-L/usr/local/lib64/
-L/usr/local/lib64/

View File

@ -45,7 +45,7 @@ with open("README.md", "rb") as fh:
setuptools.setup(
name="vosk",
version="0.3.50",
version="0.3.70",
author="Alpha Cephei Inc",
author_email="contact@alphacephei.com",
description="Offline open source speech recognition API based on Kaldi and Vosk",