Object pascal examples for recording and playing audio with portaudio. (#1271)

The recording example can be used for speech recognition while the playing example can be used for text to speech.

The portaudio wrapper for object pascal is copied from
https://github.com/UltraStar-Deluxe/USDX/blob/master/src/lib/portaudio/portaudio.pas
This commit is contained in:
Fangjun Kuang 2024-08-18 19:51:08 +08:00 committed by GitHub
parent f93f0ca94d
commit e34a1a2aa3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 1562 additions and 2 deletions

View File

@ -0,0 +1,2 @@
test-record
test-play

View File

@ -0,0 +1,13 @@
# Introduction
[portaudio.pas](./portaudio.pas)
requires that the portaudio library is installed on your system.
On macOS, you can use
```bash
brew install portaudio
```
and it will install `portaudio` into `/usr/local/Cellar/portaudio/19.7.0`.

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..
cmake --build . --target install --config Release
popd
fi
fpc \
-g \
-dSHERPA_ONNX_USE_SHARED_LIBS \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
-Fl/usr/local/Cellar/portaudio/19.7.0/lib \
./test-play.pas
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
./test-play

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..
cmake --build . --target install --config Release
popd
fi
fpc \
-g \
-dSHERPA_ONNX_USE_SHARED_LIBS \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
-Fl/usr/local/Cellar/portaudio/19.7.0/lib \
./test-record.pas
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
./test-record

View File

@ -0,0 +1,160 @@
{ Copyright (c) 2024 Xiaomi Corporation }
{
This file shows how to use portaudio for playing.
}
program main;
{$mode objfpc}{$H+}
uses
portaudio,
sherpa_onnx,
dos,
ctypes,
SysUtils;
var
Version: String;
EnvStr: String;
Status: Integer;
NumDevices: Integer;
DeviceIndex: Integer;
DeviceInfo: PPaDeviceInfo;
I: Integer;
Param: TPaStreamParameters;
Stream: PPaStream;
Wave: TSherpaOnnxWave;
Buffer: TSherpaOnnxCircularBuffer;
function PlayCallback(
input: Pointer; output: Pointer;
frameCount: culong;
timeInfo: PPaStreamCallbackTimeInfo;
statusFlags: TPaStreamCallbackFlags;
userData: Pointer ): cint; cdecl;
var
Samples: TSherpaOnnxSamplesArray;
I: Integer;
begin
if Buffer.Size >= frameCount then
begin
Samples := Buffer.Get(Buffer.Head, FrameCount);
Buffer.Pop(FrameCount);
end
else
begin
Samples := Buffer.Get(Buffer.Head, Buffer.Size);
Buffer.Pop(Buffer.Size);
SetLength(Samples, frameCount);
end;
for I := 0 to frameCount - 1 do
pcfloat(output)[I] := Samples[I];
if Buffer.Size > 0 then
Result := paContinue
else
Result := paComplete;
end;
begin
Version := String(Pa_GetVersionText);
WriteLn('Version is ', Version);
Status := Pa_Initialize;
if Status <> paNoError then
begin
WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
Exit;
end;
NumDevices := Pa_GetDeviceCount;
WriteLn('Num devices: ', NumDevices);
DeviceIndex := Pa_GetDefaultOutputDevice;
if DeviceIndex = paNoDevice then
begin
WriteLn('No default output device found');
Pa_Terminate;
Exit;
end;
EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
if EnvStr <> '' then
begin
DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
end;
for I := 0 to (NumDevices - 1) do
begin
DeviceInfo := Pa_GetDeviceInfo(I);
if I = DeviceIndex then
{ WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
else
WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)]));
end;
WriteLn('Use device ', DeviceIndex);
WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels);
Wave := SherpaOnnxReadWave('./record.wav');
if Wave.Samples = nil then
begin
WriteLn('Failed to read ./record.wav');
Pa_Terminate;
Exit;
end;
Initialize(Param);
Param.Device := DeviceIndex;
Param.ChannelCount := 1;
Param.SampleFormat := paFloat32;
param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency;
param.HostApiSpecificStreamInfo := nil;
Buffer := TSherpaOnnxCircularBuffer.Create(Length(Wave.Samples));
Buffer.Push(Wave.Samples);
Status := Pa_OpenStream(stream, nil, @Param, Wave.SampleRate, paFramesPerBufferUnspecified, paNoFlag,
PPaStreamCallback(@PlayCallback), nil);
if Status <> paNoError then
begin
WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
Pa_Terminate;
Exit;
end;
Status := Pa_StartStream(stream);
if Status <> paNoError then
begin
WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
Pa_Terminate;
Exit;
end;
while Buffer.Size > 0 do
Pa_Sleep(100); {sleep for 0.1 second }
Status := Pa_CloseStream(stream);
if Status <> paNoError then
begin
WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
Exit;
end;
Status := Pa_Terminate;
if Status <> paNoError then
begin
WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
Exit;
end;
end.

View File

@ -0,0 +1,139 @@
{ Copyright (c) 2024 Xiaomi Corporation }
{
This file shows how to use portaudio for recording.
It records for 10 seconds and saves the audio samples to ./record.wav
}
program main;
{$mode objfpc}
uses
portaudio,
sherpa_onnx,
dos,
ctypes,
SysUtils;
var
Version: String;
EnvStr: String;
Status: Integer;
NumDevices: Integer;
DeviceIndex: Integer;
DeviceInfo: PPaDeviceInfo;
I: Integer;
Param: TPaStreamParameters;
SampleRate: Double;
Stream: PPaStream;
Buffer: TSherpaOnnxCircularBuffer;
AllSamples: TSherpaOnnxSamplesArray;
function RecordCallback(
input: Pointer; output: Pointer;
frameCount: culong;
timeInfo: PPaStreamCallbackTimeInfo;
statusFlags: TPaStreamCallbackFlags;
userData: Pointer ): cint; cdecl;
begin
Buffer.Push(pcfloat(input), frameCount);
Result := paContinue;
end;
begin
Version := String(Pa_GetVersionText);
WriteLn('Version is ', Version);
Status := Pa_Initialize;
if Status <> paNoError then
begin
WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status));
Exit;
end;
NumDevices := Pa_GetDeviceCount;
WriteLn('Num devices: ', NumDevices);
DeviceIndex := Pa_GetDefaultInputDevice;
if DeviceIndex = paNoDevice then
begin
WriteLn('No default input device found');
Pa_Terminate;
Exit;
end;
EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE');
if EnvStr <> '' then
begin
DeviceIndex := StrToIntDef(EnvStr, DeviceIndex);
WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr);
end;
for I := 0 to (NumDevices - 1) do
begin
DeviceInfo := Pa_GetDeviceInfo(I);
if I = DeviceIndex then
{ WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) }
WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)]))
else
WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)]));
end;
WriteLn('Use device ', DeviceIndex);
WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name);
WriteLn(' Max input channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxInputChannels);
Initialize(Param);
Param.Device := DeviceIndex;
Param.ChannelCount := 1;
Param.SampleFormat := paFloat32;
param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighInputLatency;
param.HostApiSpecificStreamInfo := nil;
SampleRate := 48000;
Buffer := TSherpaOnnxCircularBuffer.Create(Round(SampleRate) * 20);
Status := Pa_OpenStream(stream, @Param, nil, SampleRate, paFramesPerBufferUnspecified, paNoFlag,
PPaStreamCallback(@RecordCallback), nil);
if Status <> paNoError then
begin
WriteLn('Failed to open stream, ', Pa_GetErrorText(Status));
Pa_Terminate;
Exit;
end;
Status := Pa_StartStream(stream);
if Status <> paNoError then
begin
WriteLn('Failed to start stream, ', Pa_GetErrorText(Status));
Pa_Terminate;
Exit;
end;
WriteLn('Please speak! It will exit after 10 seconds.');
Pa_Sleep(10000); {sleep for 10 seconds }
Status := Pa_CloseStream(stream);
if Status <> paNoError then
begin
WriteLn('Failed to close stream, ', Pa_GetErrorText(Status));
Exit;
end;
AllSamples := Buffer.Get(0, Buffer.Size);
SherpaOnnxWriteWave('record.wav', AllSamples, Round(SampleRate));
WriteLn('Saved to record.wav');
Status := Pa_Terminate;
if Status <> paNoError then
begin
WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status));
Exit;
end;
end.

View File

@ -5,3 +5,7 @@ This directory contains APIs for [Object Pascal](https://en.wikipedia.org/wiki/O
Please see
https://github.com/k2-fsa/sherpa-onnx/tree/master/pascal-api-examples
for usages.
[portaudio.pas](./portaudio.pas)
is copied from
https://github.com/UltraStar-Deluxe/USDX/blob/master/src/lib/portaudio/portaudio.pas

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,8 @@ unit sherpa_onnx;
(* {$LongStrings ON} *)
interface
uses
ctypes;
type
TSherpaOnnxWave = record
@ -260,7 +262,8 @@ type
public
constructor Create(Capacity: Integer);
destructor Destroy; override;
procedure Push(Samples: array of Single);
procedure Push(Samples: array of Single); overload;
procedure Push(Samples: pcfloat; N: Integer); overload;
function Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray;
procedure Pop(N: Integer);
procedure Reset;
@ -305,7 +308,6 @@ type
implementation
uses
ctypes,
fpjson,
{ See
- https://wiki.freepascal.org/fcl-json
@ -1323,6 +1325,11 @@ begin
SherpaOnnxCircularBufferPush(Self.Handle, pcfloat(Samples), Length(Samples));
end;
procedure TSherpaOnnxCircularBuffer.Push(Samples: pcfloat; N: Integer);
begin
SherpaOnnxCircularBufferPush(Self.Handle, Samples, N);
end;
function TSherpaOnnxCircularBuffer.Get(StartIndex: Integer; N: Integer): TSherpaOnnxSamplesArray;
var
P: pcfloat;