Vivox to Unity OnAudioFilterRead to FMOD programmer sound. Stutters/crackling

ArnoldMule · February 13, 2024, 8:15pm

I’m getting other player’s voice chat from Vivox using participant taps and pushing them into Unity Audio Sources, then getting the data with OnAudioFilterRead and pushing it to FMOD.
I managed to get this working by following this thread, but I’m out of my depth here.

The implementation is mostly there, but the voice has some stuttering and or crackling to it, which seems to be somewhat inconsistent. I’ve managed to get it pretty low by experimenting with different buffer sizes in FMOD and Unity, since the audio also goes through Unity’s audio system but the issue is still noticeable. The audio is crystal clear when coming out of the audio source, but not when it goes in and comes out of FMOD.

Also, I have the programmer instrument set as Async inside FMOD. Are there any setup that should be done in FMOD that I might be missing?
Here’s the script.

public class AudioFilterReader : MonoBehaviour
{
    private int systemSamplerate;
    private FMOD.Studio.EVENT_CALLBACK audioCallback;
    private Vector3 worldPos;
    VivoxParticipant participant;
    AudioOccluder myOccluder;

    private Queue<EventInstance> eventInstancePool = new Queue<EventInstance>();

    private void Start()
    {
        systemSamplerate = AudioSettings.outputSampleRate;
        InitializeEventInstancePool(100);
    }

    private void InitializeEventInstancePool(int size)
    {
        for (int i = 0; i < size; i++)
        {
            EventInstance instance = RuntimeManager.CreateInstance("event:/char/otherPlayers/VOIP");
            instance.set3DAttributes(worldPos.To3DAttributes());
            eventInstancePool.Enqueue(instance);
        }
    }

    private EventInstance GetPooledEventInstance()
    {
        if (eventInstancePool.Count > 0)
        {
            return eventInstancePool.Dequeue();
        }
        else
        {
            UnityEngine.Debug.LogWarning("EventInstance pool is empty. Creating a new instance.");
            return RuntimeManager.CreateInstance("event:/char/otherPlayers/VOIP");
        }
    }

    private void ReturnEventInstanceToPool(EventInstance instance)
    {
        eventInstancePool.Enqueue(instance);
    }

    private void OnAudioFilterRead(float[] data, int channels)
    {
        if (data == null || data.Length == 0)
            return;

        if (!participant.SpeechDetected) return;

        EventInstance programmerSound = GetPooledEventInstance();
        programmerSound.set3DAttributes(worldPos.To3DAttributes());

        var result = ConvertArrayToFMODSound(data, channels, out var sound);
        if (result != RESULT.OK)
        {
            UnityEngine.Debug.LogError($"Error creating the sound {result}");
            return;
        }

        GCHandle soundHandle = GCHandle.Alloc(sound, GCHandleType.Pinned);
        programmerSound.setUserData(GCHandle.ToIntPtr(soundHandle));
        audioCallback = new FMOD.Studio.EVENT_CALLBACK(AudioEventCallback);
        programmerSound.setCallback(audioCallback);
        programmerSound.start();
        programmerSound.setParameterByName("Occlusion", myOccluder.GetCurrentOcclusion());
        ReturnEventInstanceToPool(programmerSound);
    }
    private RESULT ConvertArrayToFMODSound(float[] data, int channels, out Sound sound)
    {
        uint lenBytes = (uint)(data.Length * sizeof(float));
        CREATESOUNDEXINFO soundInfo = new CREATESOUNDEXINFO();
        soundInfo.length = lenBytes;
        soundInfo.format = SOUND_FORMAT.PCMFLOAT;
        soundInfo.numchannels = channels;
        soundInfo.defaultfrequency = systemSamplerate;
        soundInfo.cbsize = Marshal.SizeOf(typeof(FMOD.CREATESOUNDEXINFO));

        var res = RuntimeManager.CoreSystem.createSound("voip", MODE.OPENUSER, ref soundInfo, out sound);
        if (systemSamplerate == 0)
        {
            UnityEngine.Debug.LogError("System sample rate is 0");
            sound.release();
            return RESULT.ERR_FORMAT;
        }
        if (res != RESULT.OK)
        {
            UnityEngine.Debug.LogError($"Result is not valid. {res}");
            sound.release();
            return res;
        }

        IntPtr ptr1, ptr2;
        uint len1, len2;
        sound.@lock(0, lenBytes, out ptr1, out ptr2, out len1, out len2);
        Marshal.Copy(data, 0, ptr1, (int)(len1 / sizeof(float)));
        if (len2 > 0)
        {
            Marshal.Copy(data, (int)(len1 / sizeof(float)), ptr2, (int)(len2 / sizeof(float)));
        }
        sound.unlock(ptr1, ptr2, len1, len2);
        var result = sound.setMode(MODE.OPENUSER);

        return result;
    }

    private void Update()
    {
        worldPos = transform.position;

        if (participant.SpeechDetected)
            myOccluder.calculate = true;
        else
            myOccluder.calculate = false;
    }

    [MonoPInvokeCallback(typeof(EVENT_CALLBACK))]
    static RESULT AudioEventCallback(EVENT_CALLBACK_TYPE type, IntPtr instancePtr, IntPtr parameterPtr)
    {
        EventInstance instance = new EventInstance(instancePtr);

        // Retrieve the user data
        instance.getUserData(out var pointer);

        // Get the string object
        GCHandle stringHandle = GCHandle.FromIntPtr(pointer);
        Sound sound = (Sound)stringHandle.Target;

        switch (type)
        {
            case EVENT_CALLBACK_TYPE.CREATE_PROGRAMMER_SOUND:
                {
                    var parameter = (PROGRAMMER_SOUND_PROPERTIES)Marshal.PtrToStructure(parameterPtr, typeof(PROGRAMMER_SOUND_PROPERTIES));
                    parameter.sound = sound.handle;
                    parameter.subsoundIndex = -1;
                    Marshal.StructureToPtr(parameter, parameterPtr, false);
                    break;
                }
            case EVENT_CALLBACK_TYPE.DESTROY_PROGRAMMER_SOUND:
                {
                    var parameter = (PROGRAMMER_SOUND_PROPERTIES)Marshal.PtrToStructure(parameterPtr, typeof(PROGRAMMER_SOUND_PROPERTIES));
                    sound.release();
                    sound = new Sound(parameter.sound);
                    sound.release();
                    break;
                }
            case EVENT_CALLBACK_TYPE.DESTROYED:
                {
                    // Now the event has been destroyed, unpin the string memory so it can be garbage collected
                    stringHandle.Free();
                    break;
                }
        }
        return RESULT.OK;
    }

    private void OnDestroy()
    {
        foreach (var eventInstance in eventInstancePool)
        {
            eventInstance.release();
        }
        eventInstancePool.Clear();
    }
}

Connor_FMOD · February 14, 2024, 3:06am

Hi,

Would it be possible to get a copy of your project or a stripped-down version displaying the issue uploaded to your profile? Please note you must register a project with us before uploading files.

We also have a Unity scripting example that captures the audio from a Unity video player and passes it to FMOD which may help solve the issue: Unity Integration | Scripting Examples - Video Playback

Hope this helps!

ArnoldMule · February 14, 2024, 4:19pm

Hello,

Thanks for the fast reply! I’ve made an empty Unity project with only an audio source, a loop and the filter reader script with an FMOD integration to showcase the issue. I hadn’t registered a project though so I couldn’t send it yet.

I did go over the video player example, but I couldn’t apply the little I understood to my use case, not having done any audio programming before

Connor_FMOD · February 15, 2024, 12:11am

Hi,

Awesome, you have now been approved and should be able to upload to your profile.

That’s all good, hopefully, I’ll be able to find some specific changes you can implement with your test project.

ArnoldMule · February 15, 2024, 6:00am

Great, I wanted to let you know I’ve just uploaded the test project.

Connor_FMOD · February 20, 2024, 4:43am

Hi,

Thank you for the project. Which scene should I be in to test the issue? I can see the AudioFilterReader script but I cannot find a scene that it is added to.

ArnoldMule · February 20, 2024, 6:36am

Hello,

There should only be one scene under Assets/Scenes the default SampleScene, which should have a Camera, Light and a GameObject named Audio.

Connor_FMOD · February 23, 2024, 4:52am

Hi,

Thanks for that, I was looking at the wrong Unity project.

Here are a couple of things to try:

I noticed that programmerSound.start() is being called in the OnAudioFilterRead(). I would suggest moving this out as it is restarting the same event multiple times.
Make the Programmer Instrument loop and add a Loop Region around the instrument:

image703×128 6.23 KB
The data from OnAudioFilterRead() is being passed straight into the sound. Rather try implementing an intermediate buffer like we do in the ScriptUsageVideoPlayback example. You would pass the data to the mBuffer with: mDataBuffer.AddRange(data), this could be done in the OnAudioFilterRead() function.

Intermediate Buffer

if (mBuffer.Count > 0 && mChannel.hasHandle())
{
    uint readPosition;
    mChannel.getPosition(out readPosition, FMOD.TIMEUNIT.PCMBYTES);

    /*
        * Account for wrapping
        */
    uint bytesRead = readPosition - mLastReadPosition;
    if (readPosition < mLastReadPosition)
    {
        bytesRead += mExinfo.length;
    }

    if (bytesRead > 0 && mBuffer.Count >= bytesRead)
    {
        /*
            * Fill previously read data with fresh samples
            */
        IntPtr ptr1, ptr2;
        uint len1, len2;
        var res = mSound.@lock(mLastReadPosition, bytesRead, out ptr1, out ptr2, out len1, out len2);
        if (res != FMOD.RESULT.OK) Debug.LogError(res);

        // Though exinfo.format is float, data retrieved from Sound::lock is in bytes,
        // therefore we only copy (len1+len2)/sizeof(float) full float values across
        int sampleLen1 = (int)(len1 / sizeof(float));
        int sampleLen2 = (int)(len2 / sizeof(float));
        int samplesRead = sampleLen1 + sampleLen2;
        float[] tmpBuffer = new float[samplesRead];

        mBuffer.CopyTo(0, tmpBuffer, 0, tmpBuffer.Length);
        mBuffer.RemoveRange(0, tmpBuffer.Length);

        if (len1 > 0)
        {
            Marshal.Copy(tmpBuffer, 0, ptr1, sampleLen1);
        }
        if (len2 > 0)
        {
            Marshal.Copy(tmpBuffer, sampleLen1, ptr2, sampleLen2);
        }

        res = mSound.unlock(ptr1, ptr2, len1, len2);
        if (res != FMOD.RESULT.OK) Debug.LogError(res);
        mLastReadPosition = readPosition;
        mTotalSamplesRead += (uint)samplesRead;
    }
}

Implement some form of latency

Latency

/*
    * Drift compensation
    * If we are behind our latency target, play a little faster
    * If we are ahead of our latency target, play a little slower
    */
uint samplesWritten = (uint)buffer.Length;
mTotalSamplesWritten += samplesWritten;

if (samplesWritten != 0 && (samplesWritten < mMinimumSamplesWritten))
{
    mMinimumSamplesWritten = samplesWritten;
    mAdjustedLatency = Math.Max(samplesWritten, mTargetLatency);
}

int latency = (int)mTotalSamplesWritten - (int)mTotalSamplesRead;
mActualLatency = (int)((0.93f * mActualLatency) + (0.03f * latency));

int playbackRate = mSampleRate;
if (mActualLatency < (int)(mAdjustedLatency - mDriftThreshold))
{
    playbackRate = mSampleRate - (int)(mSampleRate * (DRIFT_CORRECTION_PERCENTAGE / 100.0f));
}
else if (mActualLatency > (int)(mAdjustedLatency + mDriftThreshold))
{
    playbackRate = mSampleRate + (int)(mSampleRate * (DRIFT_CORRECTION_PERCENTAGE / 100.0f));
}
mChannel.setFrequency(playbackRate);

To implement the latency you will need access to the Channel which can be retrieved with EventInstance::getChannelGroup:

Getting the channel

RESULT result = programmerSound.getChannelGroup(out ChannelGroup master);
master.getChannel(0, out Channel);

Have a go implementing these changes and let me know if I can assist further.

TomassoAlbinoni · March 14, 2024, 9:51am

Hey @ArnoldMule , I’m working on something similar.
Did it work for you without stuttering etc., after implementing the stuff @Connor_FMOD wrote?

ArnoldMule · March 14, 2024, 10:28am

Hey there @TomassoAlbinoni

Yes, I managed to get 95% there. I implemented some kind of a buffer, but I was not able to implement latency. I find the most confusing thing was figuring out where, in what order and when to do each step.
What I did is write to a buffer inside the AudioFilterRead, and inside Update I can check whether the other player is currently sending speech through a VivoxParticipant reference.

 private void Update()
 {
     worldPos = transform.position;

     if (participant.SpeechDetected)
     {
         if(!isTalking)
         {
             myOccluder.calculate = true;
             CreateSound();
             isTalking = true;
         }

         ProcessAudioBuffer();

         if(currentEventInstance.isValid())
         {
             currentEventInstance.set3DAttributes(worldPos.To3DAttributes()); 
             currentEventInstance.setParameterByName("Occlusion", myOccluder.GetCurrentOcclusion());
         }
     }
     else
     {
         if(isTalking)
         {
             isTalking = false;
             myOccluder.calculate = false;
             currentEventInstance.stop(FMOD.Studio.STOP_MODE.ALLOWFADEOUT);
             lenBytes = 0; // I'm not sure this is necessary here, but I was trying stuff out
         }
     }
 }

There I only create the sound and create a single instance when the other player starts talking and then start processing the audio buffer. I was previously under the impression by reading the other forum post I linked above, that I should be creating multiple instances often/periodically, but this does not seem to be the case.

Now, as I said this works great 95% of the time. Sometimes when the other player starts speaking the voice gets garbled right away at the start, and they have to stop transmitting and start again for the voice to sound clear. This only happens now and then, so I suspect the way I handle my buffer is to blame. Now this part was written by ChatGPT, since I’d be lying if I said I understand how the buffer works or should work.

private const int bufferSize = 100000; 
private float[] ringBuffer = new float[bufferSize];
private int writePos = 0;

private void OnAudioFilterRead(float[] data, int channels)
{
    if (data == null || data.Length == 0 || systemSamplerate == 0)
        return;

    mChannels = channels;
    //if (!participant.SpeechDetected) return;
    // Add incoming audio data to the buffer

    for (int i = 0; i < data.Length; i++)
    {
        ringBuffer[writePos] = data[i];
        writePos = (writePos + 1) % bufferSize;
    }
}

And this is how I process it and create a sound:

private void CreateSound()
{
    float[] bufferCopy = new float[bufferSize];
    Array.Copy(ringBuffer, bufferCopy, bufferSize);

    lenBytes = (uint)(bufferCopy.Length * sizeof(float));
    CREATESOUNDEXINFO soundInfo = new CREATESOUNDEXINFO();
    soundInfo.length = lenBytes;
    soundInfo.format = SOUND_FORMAT.PCMFLOAT;
    soundInfo.numchannels = mChannels;
    soundInfo.defaultfrequency = systemSamplerate;
    soundInfo.cbsize = Marshal.SizeOf(typeof(FMOD.CREATESOUNDEXINFO));

    createSoundResult = RuntimeManager.CoreSystem.createSound("voip", MODE.OPENUSER, ref soundInfo, out mSound);

    currentEventInstance = GetPooledEventInstance();
    currentEventInstance.set3DAttributes(worldPos.To3DAttributes());

    GCHandle soundHandle = GCHandle.Alloc(mSound, GCHandleType.Pinned);
    currentEventInstance.setUserData(GCHandle.ToIntPtr(soundHandle));
    audioCallback = new FMOD.Studio.EVENT_CALLBACK(AudioEventCallback);
    currentEventInstance.setCallback(audioCallback);
    currentEventInstance.start();

    ReturnEventInstanceToPool(currentEventInstance);
}

private void ProcessAudioBuffer()
{
    if (!participant.SpeechDetected) return;

    if (ringBuffer.Length == 0)
        return;

    float[] bufferCopy = new float[bufferSize];
    Array.Copy(ringBuffer, bufferCopy, bufferSize);

    var result = ConvertArrayToFMODSound(bufferCopy, AudioSettings.outputSampleRate);
    if (result != RESULT.OK)
    {
        UnityEngine.Debug.LogError($"Error creating the sound {result}");
        return;

I hope this is of help.
Also, maybe @Connor_FMOD might have some ideas for my current problem? To reiterate, sometimes when the sound is created its garbled right at the start and stays garbled if the person keeps talking. This does not happen everytime, and when it does not, the person can keep talking for a long time and the audio stays clear. This lead me to believe there is something wrong with creating the sound/reading the buffer.
I was unable to replicate this in a simplified project. When trying to read a normal audio clip from an audiosource, it just send short looping parts through. This does not happen when the audio source is receiving a stream from Vivox.

Connor_FMOD · March 18, 2024, 9:55pm

Hi,

Is it possible to get a multiplayer project that I can test? Or could I get the full script uploaded to your profile to test in a project that I can try to whip together?

TomassoAlbinoni · March 21, 2024, 3:08pm

Similarly to what Connor asks,
It’d be great if you could share the whole class!
So far it has been very helpful, but we’re still getting crackling and we’re wondering what we might have been doing wrong.

ArnoldMule · March 25, 2024, 2:08pm

Hi @Connor_FMOD

I’m uploading a stripped down project right now. The .zip also has a build so you can test it straight away.
The relevant scripts should be at the root of the __Scripts folder, namely the AudioFilterReaderThree. I’ve wrote you some instructions on the UI, hope it helps.

Connor_FMOD · April 9, 2024, 5:05am

Hi,

Unfortunately, I have not been able to find a solution. However, there is a task to improve this workflow and I have noted your interest. Once there are updates I will post them here. Thank you again for your patience and I apologize that I can not assist further.

dougmolina · April 10, 2024, 7:21pm

Hi everyone, I was having the same audio stuttering problem when sending Vivox signal to FMOD with AudioTaps. I tried so many times with different approaches and was about to give up when I finally fixed in our game. The catch was actually using what Connor said in a previous post about using an intermediate audio buffer and setting the proper CREATESOUNDEXINFO. I currently have Unity Audio settings with buffer size at Best Performance (which is 1024) and default settings.

I implemented Latency compensation from the Video playback example, because there’s a chance that the audio buffer writes data to the buffer and is not consumed, speeding up the playback rate so there’s no delay when reading and playing the buffer.

I also have the VOIP event in FMOD as described above, using Programmer Instrument with a Loop Region around the instrument.
The AudioManager class is just a wrapper for FMOD plugin RuntimeManager, so it just loads the bank and creates the EventInstance.

Its important that it doesnt add data to the buffer if the channel handle does not exist, so that writing and reading are not too far apart. (even if Lag Compensation would solve this)

I have to clear the data array after processing Audio so it doesnt get played by the Unity engine AudioEmitter.

Here’s the code.

public class VivoxToFmodConverter : MonoBehaviour
	{
		private const int LatencyMS = 50;
		private const int DriftMS = 1;
		private const float DriftCorrectionPercentage = 0.5f;

		private AudioModel _audioModel;

		private int _systemSampleRate;
		private EventInstance _eventInstance;
		private EVENT_CALLBACK _audioCallback;

		private CREATESOUNDEXINFO _soundInfo;
		private Sound _sound;
		private Channel _channel;

		private readonly List<float> _audioBuffer = new();
		private uint _bufferSamplesWritten;
		private uint _bufferReadPosition;
		private uint _driftThreshold;
		private uint _targetLatency;
		private uint _adjustedLatency;
		private int _actualLatency;
		private uint _totalSamplesWritten;
		private uint _totalSamplesRead;
		private uint _minimumSamplesWritten = uint.MaxValue;

		private bool _isSpeaking;

		public AudioInstance AudioInstance { private set; get; }

		public void Setup(AudioModel audioModel)
		{
			_audioModel = audioModel;
			_systemSampleRate = AudioSettings.outputSampleRate;

			if (!AudioBankLoader.HasBankLoaded(_audioModel.Bank))
			{
				AudioBankLoader.LoadBank(_audioModel.Bank, true, CreateInstance);
			}
			else
			{
				CreateInstance();
			}

			_driftThreshold = (uint)(_systemSampleRate * DriftMS) / 1000;
			_targetLatency = (uint)(_systemSampleRate * LatencyMS) / 1000;
			_adjustedLatency = _targetLatency;
			_actualLatency = (int)_targetLatency;
		}

		[MonoPInvokeCallback(typeof(EVENT_CALLBACK))]
		private static RESULT AudioEventCallback(EVENT_CALLBACK_TYPE type, IntPtr instancePtr, IntPtr parameterPtr)
		{
			var instance = new EventInstance(instancePtr);
			instance.getUserData(out IntPtr soundPtr);

			if (soundPtr == IntPtr.Zero) return RESULT.OK;

			var soundHandle = GCHandle.FromIntPtr(soundPtr);
			var sound = (Sound)soundHandle.Target;

			switch (type)
			{
				case EVENT_CALLBACK_TYPE.CREATE_PROGRAMMER_SOUND:
				{
					var parameter = (PROGRAMMER_SOUND_PROPERTIES)Marshal.PtrToStructure(parameterPtr,
						typeof(PROGRAMMER_SOUND_PROPERTIES));
					parameter.sound = sound.handle;
					parameter.subsoundIndex = -1;
					Marshal.StructureToPtr(parameter, parameterPtr, false);
					break;
				}
				case EVENT_CALLBACK_TYPE.DESTROY_PROGRAMMER_SOUND:
				{
					var parameter = (PROGRAMMER_SOUND_PROPERTIES)Marshal.PtrToStructure(parameterPtr,
						typeof(PROGRAMMER_SOUND_PROPERTIES));
					sound.release();
					sound = new(parameter.sound);
					sound.release();
					break;
				}
				case EVENT_CALLBACK_TYPE.DESTROYED:
				{
					soundHandle.Free();
					break;
				}
			}

			return RESULT.OK;
		}

		private void CreateInstance()
		{
			AudioInstance = AudioManager.CreateAudioInstance(_audioModel);

			if (!AudioManager.TryGetEventInstance(AudioInstance.ID, out EventInstance eventInstance))
			{
				LogUtility.LogError("AudioInstance for VivoxParticipant has not being created:" + AudioInstance.ID,
					LogTag.Audio);
				return;
			}

			_eventInstance = eventInstance;
			_audioCallback = AudioEventCallback;
			_eventInstance.setCallback(_audioCallback);

			_eventInstance.start();
			AudioManager.AttachInstanceToGameObject(AudioInstance.ID, transform);
		}

		private void UpdateBufferLatency(uint samplesWritten)
		{
			_totalSamplesWritten += samplesWritten;

			if (samplesWritten != 0 && samplesWritten < _minimumSamplesWritten)
			{
				_minimumSamplesWritten = samplesWritten;
				_adjustedLatency = Math.Max(samplesWritten, _targetLatency);
			}

			int latency = (int)_totalSamplesWritten - (int)_totalSamplesRead;
			_actualLatency = (int)(0.93f * _actualLatency + 0.03f * latency);

			if (!_channel.hasHandle()) return;

			int playbackRate = _systemSampleRate;
			if (_actualLatency < (int)(_adjustedLatency - _driftThreshold))
			{
				playbackRate = _systemSampleRate - (int)(_systemSampleRate * (DriftCorrectionPercentage / 100.0f));
			}
			else if (_actualLatency > (int)(_adjustedLatency + _driftThreshold))
			{
				playbackRate = _systemSampleRate + (int)(_systemSampleRate * (DriftCorrectionPercentage / 100.0f));
			}

			_channel.setFrequency(playbackRate);
		}

		private void OnAudioFilterRead(float[] data, int channels)
		{
			if (_channel.hasHandle())
			{
				_audioBuffer.AddRange(data);
				UpdateBufferLatency((uint)data.Length);
			}

			_isSpeaking = false;
			foreach (float value in data)
			{
				if (value == 0) continue;

				_isSpeaking = true;
				break;
			}

			ProcessAudio(channels);

			for (int i = 0; i < data.Length; i++)
			{
				data[i] = 0;
			}
		}

		private void ProcessAudio(int channels)
		{
			if (!_channel.hasHandle())
			{
				if (!_isSpeaking) return;

				RESULT result = _eventInstance.getChannelGroup(out ChannelGroup channelGroup);
				if (result != RESULT.OK)
				{
					LogUtility.LogError(result.ToString(), LogTag.Audio);
				}

				_soundInfo.cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO));
				_soundInfo.numchannels = channels;
				_soundInfo.defaultfrequency = _systemSampleRate;
				_soundInfo.length = _targetLatency * (uint)channels * sizeof(float);
				_soundInfo.format = SOUND_FORMAT.PCMFLOAT;

				RuntimeManager.CoreSystem.createSound("voip", MODE.LOOP_NORMAL | MODE.OPENUSER, ref _soundInfo,
					out _sound);
				RuntimeManager.CoreSystem.playSound(_sound, channelGroup, false, out _channel);

				return;
			}

			if (_audioBuffer.Count == 0) return;

			_channel.getPosition(out uint readPosition, TIMEUNIT.PCMBYTES);

			uint bytesRead = readPosition - _bufferReadPosition;
			if (readPosition <= _bufferReadPosition)
			{
				bytesRead += _soundInfo.length;
			}

			if (bytesRead <= 0 || _audioBuffer.Count < bytesRead) return;

			RESULT res = _sound.@lock(_bufferReadPosition, bytesRead, out IntPtr ptr1, out IntPtr ptr2, out uint len1,
				out uint len2);
			if (res != RESULT.OK)
			{
				LogUtility.LogError(res.ToString(), LogTag.Audio);
			}

			// Though soundInfo.format is float, data retrieved from Sound::lock is in bytes,
			// so we only copy (len1+len2)/sizeof(float) full float values across
			int sampleLen1 = (int)(len1 / sizeof(float));
			int sampleLen2 = (int)(len2 / sizeof(float));
			int samplesRead = sampleLen1 + sampleLen2;
			float[] tmpBuffer = new float[samplesRead];

			_audioBuffer.CopyTo(0, tmpBuffer, 0, tmpBuffer.Length);
			_audioBuffer.RemoveRange(0, tmpBuffer.Length);

			if (len1 > 0)
			{
				Marshal.Copy(tmpBuffer, 0, ptr1, sampleLen1);
			}
			if (len2 > 0)
			{
				Marshal.Copy(tmpBuffer, sampleLen1, ptr2, sampleLen2);
			}

			res = _sound.unlock(ptr1, ptr2, len1, len2);
			if (res != RESULT.OK)
			{
				LogUtility.LogError(res.ToString(), LogTag.Audio);
			}

			_bufferReadPosition = readPosition;
			_totalSamplesRead += (uint)samplesRead;

			var soundHandle = GCHandle.Alloc(_sound, GCHandleType.Pinned);
			_eventInstance.setUserData(GCHandle.ToIntPtr(soundHandle));
		}

		private void OnDestroy()
		{
			_sound.release();
		}
	}

ArnoldMule · April 11, 2024, 11:41am

Hi @dougmolina
I quickly tested your script and does seem to work better than mine. Big thanks for your contribution!
Here’s to more immersive proximity chats, cheers!

edvinas108 · February 13, 2025, 7:37pm

Working perfectly here as well! Kudos

Topic		Replies	Views
Using FMOD with voice chat utilizing OnAudioFilterRead() Unity	21	3291	December 12, 2021
Sandy noise while streaming audio/voice Unity unity , csharp	2	33	July 23, 2025
Trying to play a Voice Data stream using a ByteArray in Unreal Engine 5 Unreal Engine cpp , ue5	13	1005	April 12, 2024
Fmod events Voicecomms netcoding Unity unity , csharp	1	17	June 5, 2025
Seeking micro stuttering with FFMpeg FMOD Engine cpp	9	651	January 30, 2023

Vivox to Unity OnAudioFilterRead to FMOD programmer sound. Stutters/crackling

Related topics