So I had this problem for a while. I am streaming voice audio and I constantly receiving some kind of sandy noise glitches.
My setup. I am receiving updates from voice chat for example each ~10ms. I am writing that data to the ring buffer. I then read from FMOD using ReadPCMDataCallback ~20ms (default callback rate). Everything is good, but I keep hearing that “sandy” artefacts. Maybe you know why they are happening.
This is my stripped test case with removed fast forward and other stuff. I am calling ReceivedAudioCallback every second providing 1 second worth of samples. Just constant noise. And I still keep hearing those sandy
artefacts. My guess it is some kind of thread hanging in between PCM callbacks?
Attaching audio as well, maybe it will indicate something too?
/// <summary>
/// Bridges Discord voice data to FMOD audio playback.
/// </summary>
public class DiscordToFMOD
{
// channel/sample rate defaults
private const int DEFAULT_CHANNEL_COUNT = 2;
private const int DEFAULT_SAMPLE_RATE = 48000;
private const int PCM16_SIZE = sizeof(short);
// Ring buffer and FMOD buffer configuration
// This should give enough for both delay and jitter and update rate differences
private const int SOUND_BUFFER_MS = 10000;//It was 2000 before
private const int MAX_VOICE_DELAY_MS = 100;
private const int DECODE_BUFFER_MS = 20;
// This is the size of both fmod ring buffer (probably) and our own ring buffer used for writing from discord
// We will create a buffer which will loop around with playing sound. We will write at X and read it with a bit of delay.
private const int SOUND_BUFFER_SIZE = DEFAULT_SAMPLE_RATE * DEFAULT_CHANNEL_COUNT * PCM16_SIZE / 1000 * SOUND_BUFFER_MS;
// Instead of looping with % SOUND_BUFFER_SIZE we will be doing cheaper & soundBufferMask operation, but for that we need power of two buffer
private static int SoundBufferSizeRounded = SOUND_BUFFER_SIZE;
private static int SoundBufferMask = SOUND_BUFFER_SIZE - 1;
private const int MAX_VOICE_DELAY = DEFAULT_SAMPLE_RATE * DEFAULT_CHANNEL_COUNT * PCM16_SIZE / 1000 * MAX_VOICE_DELAY_MS;
private const int DECODE_BUFFER_SIZE = DEFAULT_SAMPLE_RATE * DECODE_BUFFER_MS / 1000;
private ConcurrentDictionary<ulong, AudioUser> _audioUsers = new ConcurrentDictionary<ulong, AudioUser>();
private readonly NBG.Core.Logger Logger;
public DiscordToFMOD(NBG.Core.Logger Logger)
{
//Main thread
//NBG.Core.Log.LogInfo("DiscordToFMOD thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
this.Logger = Logger;
//This is ring buffer size in FMOD.
//Read everything about it before changing.
//https://www.fmod.com/docs/2.00/api/core-api-system.html#system_setdspbuffersize
//RuntimeManager.CoreSystem.setDSPBufferSize(CALLBACK_RATE, 4);
//We are looing ring buffer with & operation instead of % as it is cheaper, but for that we need power of two buffer size
int frameSize = DEFAULT_CHANNEL_COUNT * PCM16_SIZE;
SoundBufferSizeRounded = math.ceilpow2(SOUND_BUFFER_SIZE);
SoundBufferSizeRounded = (SoundBufferSizeRounded / frameSize) * frameSize;
SoundBufferMask = SoundBufferSizeRounded - 1;
}
public void Destroy()
{
//Main thread
//NBG.Core.Log.LogInfo("Destroy thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
Dispose();
}
public void Dispose()
{
//Can be main thread; Can be some random discord thread
//NBG.Core.Log.LogInfo("Dispose thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
foreach (var audioUser in _audioUsers.Values)
{
if (audioUser != null)
{
audioUser.Dispose();
}
}
_audioUsers.Clear();
}
/// <summary>
/// Called when a remote participant is added or removed.
/// Triggered on each remote participant.
/// So if you will join existing room it will trigger for each already present member
/// It will trigger one per new client joined as well.
/// Not including yourself
/// </summary>
/// <param name="userId"></param>
/// <param name="added"></param>
public void OnRemoteParticipantStateChanged(ulong userId, bool added)
{
//Main thread
//NBG.Core.Log.LogInfo("OnParticipantChanged thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
if (added)
{
if (!_audioUsers.ContainsKey(userId))
{
var audioUser = new AudioUser();
var success = _audioUsers.TryAdd(userId, audioUser);
}
}
else
{
if (_audioUsers.TryRemove(userId, out var audioUser))
{
if (audioUser != null)
{
audioUser.Dispose();
audioUser = null;
}
}
}
}
/// <summary>
/// Receives PCM audio from Discord and writes it into the user's buffer.
/// This is not constant stream but it will be received from user only then user voice is active
/// </summary>
/// <param name="userId"></param>
/// <param name="data"></param>
/// <param name="samplesPerChannel"></param>
/// <param name="sampleRate"></param>
/// <param name="channels"></param>
public void ReceivedAudioCallback(
ulong userId,
IntPtr data,
ulong samplesPerChannel, // for example: 480/960, etc..
int sampleRate, // default: 48000
ulong channels) // default: 2
{
//Some random discord thread
//NBG.Core.Log.LogInfo("ReceivedAudioCallback thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
if (data == IntPtr.Zero)
{
NBG.Core.Asserts.IsTrue(false, "Discord returned null buffer.");
return;
}
// We are assuming for now that discord will always return same sampleRate and channelCount
// In reality sometimes we are receiving different, but for now just ignoring as we are not setting
if (sampleRate != DEFAULT_SAMPLE_RATE || channels != DEFAULT_CHANNEL_COUNT)
{
NBG.Core.Log.LogWarningFormat("Discord non default sampleRate:{0} channelCount:{1}", sampleRate, channels);
return;
}
// Dont do anything if audioUser is not present
if (!_audioUsers.TryGetValue(userId, out var audioUser) || audioUser == null || audioUser.AudioBuffer == null)
{
return;
}
//We will write everything in two batches. End of buffer and start of buffer if looping is happening
int frameSize = DEFAULT_CHANNEL_COUNT * PCM16_SIZE;
//Discord should always return aligned bytes so no need to correct
int byteCount = (int)(samplesPerChannel * channels * PCM16_SIZE);
unsafe
{
int writePos = audioUser.WritePosition;
NBG.Core.Asserts.IsTrue(writePos % frameSize == 0);
int firstPart = Math.Min(byteCount, SoundBufferSizeRounded - writePos);
int secondPart = byteCount - firstPart;
fixed (byte* bufferPtr = audioUser.AudioBuffer)
{
byte* dstPtr = bufferPtr + writePos;
byte* srcPtr = (byte*)data;
Buffer.MemoryCopy(srcPtr, dstPtr, firstPart, firstPart);
if (secondPart > 0)
{
Buffer.MemoryCopy(srcPtr + firstPart, bufferPtr, secondPart, secondPart);
}
}
}
// How much bytes are still not read
// This is the only atomic way of doing it
Interlocked.Add(ref audioUser._availableBytes, byteCount);
// Pointer where buffer is at for writing
audioUser.WritePosition = (audioUser.WritePosition + byteCount) & SoundBufferMask;
}
/// <summary>
/// Per-user FMOD audio bridge.
/// </summary>
private class AudioUser
{
private static readonly ConcurrentDictionary<IntPtr, AudioUser> _eventToUserMap = new();
private static readonly ConcurrentDictionary<IntPtr, AudioUser> _soundToUserMap = new();
// This is looping audio buffer for each user discord/fmod link
// We are writing from discord thread and reading from fmod thread and applying that we read to fmod buffer
public byte[] AudioBuffer;
// Discord writes at this buffer byte position
// Only used by discord thread
public int WritePosition;
// FMOD reads at this buffer position
// Only used by FMOD thread
public int ReadPosition;
//How much bytes are already written written by discord, but not read by FMOD
internal int _availableBytes;
public int AvailableBytes
{
get => Interlocked.CompareExchange(ref _availableBytes, 0, 0);
set => Interlocked.Exchange(ref _availableBytes, value);
}
private EventInstance FMODEventInstance;
private Sound FMODCustomSound;
private EVENT_CALLBACK FMODEventCallback;
private GCHandle FMODEventHandle;
private SOUND_PCMREAD_CALLBACK PCMReadCallback;
private GCHandle PCMReadHandle;
private byte[] CustomSoundDataBuffer;
private GCHandle CustomSoundDataBufferHandle;
public AudioUser()
{
//Main thread
//NBG.Core.Log.LogInfo("AudioUser thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
AudioBuffer = ArrayPool<byte>.Shared.Rent(SoundBufferSizeRounded);
Array.Clear(AudioBuffer, 0, SoundBufferSizeRounded);
// Setup FMOD event callback (keep handle alive for GC)
FMODEventCallback = new EVENT_CALLBACK(VoiceFMODEventCallback);
FMODEventHandle = GCHandle.Alloc(FMODEventCallback);
// Settings callbacks on FMOD side for playing sound
var discordConfig = DiscordConfig.GetOrCreate();
FMODEventInstance = RuntimeManager.CreateInstance(discordConfig.AudioForVoiP);
if (!FMODEventInstance.isValid())
{
throw new Exception("Discord FMOD event is not valid!");
}
_eventToUserMap[FMODEventInstance.handle] = this;
FMODEventInstance.setCallback(FMODEventCallback, EVENT_CALLBACK_TYPE.CREATE_PROGRAMMER_SOUND);
//Start sound. it will loop infinitely
FMODEventInstance.start();
}
public void Dispose()
{
//Main thread
//NBG.Core.Log.LogInfo("Dispose thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
if (AudioBuffer != null)
{
ArrayPool<byte>.Shared.Return(AudioBuffer);
AudioBuffer = null;
}
if (FMODEventHandle.IsAllocated)
{
FMODEventHandle.Free();
}
if (FMODEventInstance.isValid())
{
_eventToUserMap.TryRemove(FMODEventInstance.handle, out _);
FMODEventInstance.stop(FMOD.Studio.STOP_MODE.IMMEDIATE);
FMODEventInstance.release();
}
if (FMODCustomSound.hasHandle())
{
FMODCustomSound.release();
}
if (PCMReadHandle.IsAllocated)
{
PCMReadHandle.Free();
}
if (CustomSoundDataBufferHandle.IsAllocated)
{
CustomSoundDataBufferHandle.Free();
_soundToUserMap.TryRemove(FMODCustomSound.handle, out _);
}
if (CustomSoundDataBuffer != null)
{
ArrayPool<byte>.Shared.Return(CustomSoundDataBuffer);
CustomSoundDataBuffer = null;
}
}
[AOT.MonoPInvokeCallback(typeof(EVENT_CALLBACK))]
private static FMOD.RESULT VoiceFMODEventCallback(EVENT_CALLBACK_TYPE type, IntPtr _event, IntPtr parameterPtr)
{
//Some random FMOD thread
//NBG.Core.Log.LogInfo("VoiceFMODEventCallback thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
// Recover the instance
if (!_eventToUserMap.TryGetValue(_event, out var self))
{
return RESULT.OK;
}
return self?.HandleEventCallback(type, _event, parameterPtr) ?? FMOD.RESULT.ERR_INVALID_PARAM;
}
private FMOD.RESULT HandleEventCallback(EVENT_CALLBACK_TYPE type, IntPtr _event, IntPtr parameterPtr)
{
if (type == EVENT_CALLBACK_TYPE.CREATE_PROGRAMMER_SOUND)
{
var parameters = (PROGRAMMER_SOUND_PROPERTIES)Marshal.PtrToStructure(parameterPtr, typeof(PROGRAMMER_SOUND_PROPERTIES));
//Create those callbacks only once
if (!FMODCustomSound.hasHandle())
{
CREATESOUNDEXINFO exInfo = new()
{
cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO)),
numchannels = DEFAULT_CHANNEL_COUNT,
defaultfrequency = DEFAULT_SAMPLE_RATE,
format = SOUND_FORMAT.PCM16,
length = (uint)SoundBufferSizeRounded,
decodebuffersize = DECODE_BUFFER_SIZE,//Will fire each X size (960 == 20ms at 48kHz)
};
//Create event and assign handle to prevent GC
PCMReadCallback = new SOUND_PCMREAD_CALLBACK(ReadPCMDataCallback);
PCMReadHandle = GCHandle.Alloc(PCMReadCallback);
exInfo.pcmreadcallback = PCMReadCallback;
//Custom data buffer provided for actual sound inside FMOD.
CustomSoundDataBuffer = ArrayPool<byte>.Shared.Rent(SoundBufferSizeRounded);
CustomSoundDataBufferHandle = GCHandle.Alloc(CustomSoundDataBuffer, GCHandleType.Pinned);
var result = RuntimeManager.CoreSystem.createSound(
CustomSoundDataBufferHandle.AddrOfPinnedObject(),
MODE.OPENUSER | MODE.LOOP_NORMAL | MODE.CREATESTREAM,
ref exInfo,
out FMODCustomSound
);
_soundToUserMap[FMODCustomSound.handle] = this;
if (result != RESULT.OK)
{
NBG.Core.Log.LogError("FMOD sound creation failed: " + result);
}
}
parameters.sound = FMODCustomSound.handle;
Marshal.StructureToPtr(parameters, parameterPtr, false);
}
return FMOD.RESULT.OK;
}
[AOT.MonoPInvokeCallback(typeof(SOUND_PCMREAD_CALLBACK))]
private static RESULT ReadPCMDataCallback(IntPtr soundPtr, IntPtr data, uint dataLength)
{
//Some random FMOD thread
//NBG.Core.Log.LogInfo("ReadPCMDataCallback thread: " + System.Threading.Thread.CurrentThread.ManagedThreadId);
var sound = new FMOD.Sound(soundPtr);
if (!_soundToUserMap.TryGetValue(sound.handle, out var self) || self == null)
{
return RESULT.OK;
}
return self.ReadPCMDataInternal(data, dataLength);
}
private RESULT ReadPCMDataInternal(IntPtr data, uint dataLength)
{
if (dataLength == 0 || data == IntPtr.Zero)
{
NBG.Core.Log.LogWarningFormat("ReadPCMDataInternal, but couldnt proceed. AvailableBytes:{0}. dataLength:{1} dataZero:{2}", AvailableBytes, dataLength, (data == IntPtr.Zero));
return RESULT.OK;
}
// Check how much bytes were written by discord
int available = AvailableBytes;
int frameSize = DEFAULT_CHANNEL_COUNT * PCM16_SIZE;
// If too much delay, skip ahead
/*int skip = available - MAX_VOICE_DELAY;
skip = (skip / frameSize) * frameSize; // round down to nearest frame
if (skip > 0)
{
NBG.Core.Log.LogWarning("Too big delay, forcefully jumping to correct delays, but might cause some artifacts.");
ReadPosition = (ReadPosition + skip) & SoundBufferMask;
NBG.Core.Asserts.IsTrue(ReadPosition % frameSize == 0);
Interlocked.Add(ref _availableBytes, -skip);
available -= skip;
}*/
int readNow = (int)Math.Min(available, dataLength);
readNow = (readNow / frameSize) * frameSize;
NBG.Core.Asserts.IsTrue(readNow % frameSize == 0);
unsafe
{
fixed (byte* bufferPtr = AudioBuffer)
{
byte* srcPtr = bufferPtr + ReadPosition;
byte* dstPtr = (byte*)data;
int firstPart = Math.Min(readNow, SoundBufferSizeRounded - ReadPosition);
int secondPart = readNow - firstPart;
Buffer.MemoryCopy(srcPtr, dstPtr, firstPart, firstPart);
if (secondPart > 0)
{
Buffer.MemoryCopy(bufferPtr, dstPtr + firstPart, secondPart, secondPart);
}
// Fill remaining buffer with zeros (silence)
/*if (readNow < dataLength)
{
for (int i = readNow; i < dataLength; i++)
{
dstPtr[i] = 0;
}
}*/
}
}
// Update read position of discord buffer
ReadPosition = (ReadPosition + readNow) & SoundBufferMask;
NBG.Core.Asserts.IsTrue(ReadPosition % frameSize == 0);
// Reduce available bytes by the amount we are reading this time
Interlocked.Add(ref _availableBytes, -readNow);
return RESULT.OK;
}
}
}