Hello!
I’m just starting to learn the theory of digital audio and fmod.
My task is to record voice from a microphone for some seconds and transfer the recorded data to create an unity audioclip. I was able to implement a system that allows me to hear my microphone and read the raw data, but apparently I am reading it incorrectly - when I create an audio clip with this data, my audio speeds up several times. My starting point is https://fmod.com/docs/2.02/unity/examples-dsp-capture.html example.
FMODVoiceProcessor
using FMOD;
using FMODUnity;
using System;
using System.Runtime.InteropServices;
using UnityEngine;
public class FMODVoiceProcessor : MonoBehaviour
{
    public event Action<float[]> FrameCaptured;
    private int _recordDeviceId = 0;
    private float[] _sampleBuffer;
    private CREATESOUNDEXINFO _exinfo;
    private ChannelGroup _masterCG;
    private Channel _channel;
    private Sound _sound;
    private DSP _captureDSP;
    private GCHandle _objHandle;
    public void EnableRecording()
    {
        RESULT res = RuntimeManager.CoreSystem.getRecordNumDrivers(out int numofDrivers, out int numOfDriversConnected);
        RuntimeManager.CoreSystem.getRecordDriverInfo(_recordDeviceId, out _, 0,
            out Guid micGUID, out int _sampleRate, out SPEAKERMODE speakerMode, out int captureNumChannels, out DRIVER_STATE driverState);
        // create sound where capture is recorded
        _exinfo.cbsize = Marshal.SizeOf(typeof(CREATESOUNDEXINFO));
        _exinfo.numchannels = captureNumChannels;
        _exinfo.format = SOUND_FORMAT.PCM16;
        _exinfo.defaultfrequency = _sampleRate;
        _exinfo.length = (uint)_sampleRate * sizeof(short) * (uint)captureNumChannels;
        RuntimeManager.CoreSystem.createSound(
            _exinfo.userdata,
            MODE.LOOP_NORMAL | MODE.OPENUSER,
            ref _exinfo, out _sound);
        RuntimeManager.CoreSystem.recordStart(_recordDeviceId, _sound, true);
        // play sound on dedicated channel in master channel group
        if (RuntimeManager.CoreSystem.getMasterChannelGroup(out _masterCG) != RESULT.OK)
            UnityEngine.Debug.LogWarningFormat("FMOD: Unable to create a master channel group: masterCG");
        RuntimeManager.CoreSystem.getMasterChannelGroup(out _masterCG);
        RuntimeManager.CoreSystem.playSound(_sound, _masterCG, true, out _channel);
        _channel.setPaused(true);
        // Allocate a data buffer large enough for 8 channels, pin the memory to avoid garbage collection
        RuntimeManager.CoreSystem.getDSPBufferSize(out uint bufferLength, out int numBuffers);
        _sampleBuffer = new float[bufferLength * 8];
        // Get a handle to this object to pass into the callback
        _objHandle = GCHandle.Alloc(this);
        if (_objHandle != null)
        {
            // Define a basic DSP that receives a callback each mix to capture audio
            DSP_DESCRIPTION desc = new DSP_DESCRIPTION();
            desc.numinputbuffers = 1;
            desc.numoutputbuffers = 1;
            desc.read = CaptureDSPReadCallback;
            desc.userdata = GCHandle.ToIntPtr(_objHandle);
            // Create an instance of the capture DSP and attach it to the master channel group to capture all audio            
            if (RuntimeManager.CoreSystem.createDSP(ref desc, out _captureDSP) == RESULT.OK)
            {
                if (_masterCG.addDSP(0, _captureDSP) != RESULT.OK)
                {
                    UnityEngine.Debug.LogWarningFormat("FMOD: Unable to add mCaptureDSP to the master channel group");
                }
            }
            else
            {
                UnityEngine.Debug.LogWarningFormat("FMOD: Unable to create a DSP: mCaptureDSP");
            }
        }
        else
        {
            UnityEngine.Debug.LogWarningFormat("FMOD: Unable to create a GCHandle: mObjHandle");
        }
    }
    [AOT.MonoPInvokeCallback(typeof(DSP_READ_CALLBACK))]
    static RESULT CaptureDSPReadCallback(ref DSP_STATE dsp_state, IntPtr inbuffer, IntPtr outbuffer, uint length, int inchannels, ref int outchannels)
    {
        DSP_STATE_FUNCTIONS functions = (DSP_STATE_FUNCTIONS)Marshal.PtrToStructure(dsp_state.functions, typeof(DSP_STATE_FUNCTIONS));
        functions.getuserdata(ref dsp_state, out IntPtr userData);
        GCHandle objHandle = GCHandle.FromIntPtr(userData);
        FMODMicrophoneRecorder obj = objHandle.Target as FMODMicrophoneRecorder;
        // Copy the incoming buffer to process later
        int lengthElements = (int)length * inchannels;
        Marshal.Copy(inbuffer, obj._sampleBuffer, 0, lengthElements);
        // Copy the inbuffer to the outbuffer so we can still hear it
        Marshal.Copy(obj._sampleBuffer, 0, outbuffer, lengthElements);
        //for some reason i have 6 in/out channles and and 4 of them are empty,
        //more details here - https://qa.fmod.com/t/dsp-read-callback-show-6-in-out-channels-instead-of-1/21088,
        //so I had to extract only the non - empty data, but this event send all data from obj._sampleBuffer
        obj.FrameCaptured?.Invoke(obj._sampleBuffer);
        return RESULT.OK;
    }
    private void Start()
    {
        EnableRecording();
    }
}
MicrophoneRecorder receive data frame from FMODVoiceProcessor using an event FrameCapture
public class MicrophoneRecorder: MonoBehaviour
{
    public FMODVoiceProcessor VoiceProcessor;
    public AudioSource AudioSource;
    public AudioClip AudioClip;
    public float[] fbuffer;
    public int RecorderBufferSize = 100000;
    public int RecordedFloats = 0;
    private void OnFrameCaptured(float[] capturedFrame)
    {
        if (RecordedFloats + capturedFrame.Length > fbuffer.Length)
        {
            StopCapture();
            return;
        }
        Buffer.BlockCopy(capturedFrame, 0, fbuffer, RecordedFloats, capturedFrame.Length * 4);
        RecordedFloats += capturedFrame.Length;
    }
    [ContextMenu("CreateSound")]
    private void CreateSound()
    {
        AudioClip = AudioClip.Create("Test", fbuffer.Length, 1, 48000, false);
        AudioClip.SetData(fbuffer, 0);
    }
    [ContextMenu("PlaySound")]
    private void PlaySound()
    {
        AudioSource.clip = AudioClip;
        AudioSource.Play();
    }
    [ContextMenu("InitializeFloatBuffer")]
    private void InitializeFloatBuffer()
    {
        fbuffer = new float[RecorderBufferSize];
        RecordedFloats = 0;
    }
    [ContextMenu("StartCapture")]
    public void StartCapture()
    {
        VoiceProcessor.FrameCaptured+= OnFrameCaptured;
    }
    [ContextMenu("StopCapture")]
    public void StopCapture()
    {
        VoiceProcessor.FrameCaptured-= OnFrameCaptured;
    }
}
I would appreciate any help.
Thanks!