Hello!
I’m just starting to learn the theory of digital audio and fmod.
My task is to record voice from a microphone for some seconds and transfer the recorded data to create an unity audioclip. I was able to implement a system that allows me to hear my microphone and read the raw data, but apparently I am reading it incorrectly - when I create an audio clip with this data, my audio speeds up several times. My starting point is https://fmod.com/docs/2.02/unity/examples-dsp-capture.html example.
FMODVoiceProcessor
using FMOD;
using FMODUnity;
using System;
using System.Runtime.InteropServices;
using UnityEngine;
public class FMODVoiceProcessor : MonoBehaviour
{
public event Action<float[]> FrameCaptured;
private int _recordDeviceId = 0;
private float[] _sampleBuffer;
private CREATESOUNDEXINFO _exinfo;
private ChannelGroup _masterCG;
private Channel _channel;
private Sound _sound;
private DSP _captureDSP;
private GCHandle _objHandle;
public void EnableRecording()
{
RESULT res = RuntimeManager.CoreSystem.getRecordNumDrivers(out int numofDrivers, out int numOfDriversConnected);
RuntimeManager.CoreSystem.getRecordDriverInfo(_recordDeviceId, out _, 0,
out Guid micGUID, out int _sampleRate, out SPEAKERMODE speakerMode, out int captureNumChannels, out DRIVER_STATE driverState);
// create sound where capture is recorded
_exinfo.cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO));
_exinfo.numchannels = captureNumChannels;
_exinfo.format = SOUND_FORMAT.PCM16;
_exinfo.defaultfrequency = _sampleRate;
_exinfo.length = (uint)_sampleRate * sizeof(short) * (uint)captureNumChannels;
RuntimeManager.CoreSystem.createSound(
_exinfo.userdata,
MODE.LOOP_NORMAL | MODE.OPENUSER,
ref _exinfo, out _sound);
RuntimeManager.CoreSystem.recordStart(_recordDeviceId, _sound, true);
// play sound on dedicated channel in master channel group
if (RuntimeManager.CoreSystem.getMasterChannelGroup(out _masterCG) != RESULT.OK)
UnityEngine.Debug.LogWarningFormat("FMOD: Unable to create a master channel group: masterCG");
RuntimeManager.CoreSystem.getMasterChannelGroup(out _masterCG);
RuntimeManager.CoreSystem.playSound(_sound, _masterCG, true, out _channel);
_channel.setPaused(true);
// Allocate a data buffer large enough for 8 channels, pin the memory to avoid garbage collection
RuntimeManager.CoreSystem.getDSPBufferSize(out uint bufferLength, out int numBuffers);
_sampleBuffer = new float[bufferLength * 8];
// Get a handle to this object to pass into the callback
_objHandle = GCHandle.Alloc(this);
if (_objHandle != null)
{
// Define a basic DSP that receives a callback each mix to capture audio
DSP_DESCRIPTION desc = new DSP_DESCRIPTION();
desc.numinputbuffers = 1;
desc.numoutputbuffers = 1;
desc.read = CaptureDSPReadCallback;
desc.userdata = GCHandle.ToIntPtr(_objHandle);
// Create an instance of the capture DSP and attach it to the master channel group to capture all audio
if (RuntimeManager.CoreSystem.createDSP(ref desc, out _captureDSP) == RESULT.OK)
{
if (_masterCG.addDSP(0, _captureDSP) != RESULT.OK)
{
UnityEngine.Debug.LogWarningFormat("FMOD: Unable to add mCaptureDSP to the master channel group");
}
}
else
{
UnityEngine.Debug.LogWarningFormat("FMOD: Unable to create a DSP: mCaptureDSP");
}
}
else
{
UnityEngine.Debug.LogWarningFormat("FMOD: Unable to create a GCHandle: mObjHandle");
}
}
[AOT.MonoPInvokeCallback(typeof(DSP_READ_CALLBACK))]
static RESULT CaptureDSPReadCallback(ref DSP_STATE dsp_state, IntPtr inbuffer, IntPtr outbuffer, uint length, int inchannels, ref int outchannels)
{
DSP_STATE_FUNCTIONS functions = (DSP_STATE_FUNCTIONS)Marshal.PtrToStructure(dsp_state.functions, typeof(DSP_STATE_FUNCTIONS));
functions.getuserdata(ref dsp_state, out IntPtr userData);
GCHandle objHandle = GCHandle.FromIntPtr(userData);
FMODMicrophoneRecorder obj = objHandle.Target as FMODMicrophoneRecorder;
// Copy the incoming buffer to process later
int lengthElements = (int)length * inchannels;
Marshal.Copy(inbuffer, obj._sampleBuffer, 0, lengthElements);
// Copy the inbuffer to the outbuffer so we can still hear it
Marshal.Copy(obj._sampleBuffer, 0, outbuffer, lengthElements);
//for some reason i have 6 in/out channles and and 4 of them are empty,
//more details here - https://qa.fmod.com/t/dsp-read-callback-show-6-in-out-channels-instead-of-1/21088,
//so I had to extract only the non - empty data, but this event send all data from obj._sampleBuffer
obj.FrameCaptured?.Invoke(obj._sampleBuffer);
return RESULT.OK;
}
private void Start()
{
EnableRecording();
}
}
MicrophoneRecorder receive data frame from FMODVoiceProcessor using an event FrameCapture
public class MicrophoneRecorder: MonoBehaviour
{
public FMODVoiceProcessor VoiceProcessor;
public AudioSource AudioSource;
public AudioClip AudioClip;
public float[] fbuffer;
public int RecorderBufferSize = 100000;
public int RecordedFloats = 0;
private void OnFrameCaptured(float[] capturedFrame)
{
if (RecordedFloats + capturedFrame.Length > fbuffer.Length)
{
StopCapture();
return;
}
Buffer.BlockCopy(capturedFrame, 0, fbuffer, RecordedFloats, capturedFrame.Length * 4);
RecordedFloats += capturedFrame.Length;
}
[ContextMenu("CreateSound")]
private void CreateSound()
{
AudioClip = AudioClip.Create("Test", fbuffer.Length, 1, 48000, false);
AudioClip.SetData(fbuffer, 0);
}
[ContextMenu("PlaySound")]
private void PlaySound()
{
AudioSource.clip = AudioClip;
AudioSource.Play();
}
[ContextMenu("InitializeFloatBuffer")]
private void InitializeFloatBuffer()
{
fbuffer = new float[RecorderBufferSize];
RecordedFloats = 0;
}
[ContextMenu("StartCapture")]
public void StartCapture()
{
VoiceProcessor.FrameCaptured+= OnFrameCaptured;
}
[ContextMenu("StopCapture")]
public void StopCapture()
{
VoiceProcessor.FrameCaptured-= OnFrameCaptured;
}
}
I would appreciate any help.
Thanks!