Get Pitch on Microphones

Hello,

I’m simply trying to “merge” 2 examples of FMOD Unity : Record microphones and GetSpectrum.

I have this so far :

using System;
using System.Runtime.InteropServices;
using UnityEngine;

public class ScriptUsageRecordMicrophone : MonoBehaviour
{
    private FMOD.System fmodSystem;
    private FMOD.DSP mFFT;
    private float[] mFFTSpectrum;
    const int WindowSize = 1024;
    
    public int id;
    private int m_microphoneID;

    private uint LATENCY_MS = 5;
    private uint DRIFT_MS = 1;

    private uint samplesRecorded, samplesPlayed = 0;
    private int nativeRate, nativeChannels = 0;
    private uint recSoundLength = 0;
    uint lastPlayPos = 0;
    uint lastRecordPos = 0;
    private uint driftThreshold = 0;
    private uint desiredLatency = 0;
    private uint adjustLatency = 0;
    private int actualLatency = 0;

    private FMOD.CREATESOUNDEXINFO exInfo = new FMOD.CREATESOUNDEXINFO();

    private FMOD.Sound recSound;
    private FMOD.Channel channel;

    // Start is called before the first frame update
    void Start()
    {
        fmodSystem = FMODUnity.RuntimeManager.CoreSystem;

        /*
            Determine latency in samples.
        */
        m_microphoneID = -1;
        string name = "";
        FMOD.SPEAKERMODE speakerMode;
        FMOD.DRIVER_STATE driverState;

        while (m_microphoneID < id)
        {
            FMODUnity.RuntimeManager.CoreSystem.getRecordDriverInfo(id, out name, 30, out _, out nativeRate, out speakerMode, out nativeChannels, out driverState);
            UnityEngine.Debug.Log(name);

            if (driverState == (FMOD.DRIVER_STATE.DEFAULT | FMOD.DRIVER_STATE.CONNECTED)
                && speakerMode == FMOD.SPEAKERMODE.MONO)
                m_microphoneID++;
        }
        driftThreshold = (uint)(nativeRate * DRIFT_MS) / 1000;
        desiredLatency = (uint)(nativeRate * LATENCY_MS) / 1000;
        adjustLatency = desiredLatency;
        actualLatency = (int)desiredLatency;

        /*
            Create user sound to record into, then start recording.
        */
        exInfo.cbsize = Marshal.SizeOf(typeof(FMOD.CREATESOUNDEXINFO));
        exInfo.numchannels = nativeChannels;
        exInfo.format = FMOD.SOUND_FORMAT.PCM16;
        exInfo.defaultfrequency = nativeRate;
        exInfo.length = (uint)(nativeRate * sizeof(short) * nativeChannels);

        FMODUnity.RuntimeManager.CoreSystem.createSound("", FMOD.MODE.LOOP_NORMAL | FMOD.MODE.OPENUSER, ref exInfo, out recSound);

        FMODUnity.RuntimeManager.CoreSystem.recordStart(id, recSound, true);

        recSound.getLength(out recSoundLength, FMOD.TIMEUNIT.PCM);
    }

    // Update is called once per frame
    void Update()
    {
        /*
            Determine how much has been recorded since we last checked
        */
        uint recordPos = 0;
        FMODUnity.RuntimeManager.CoreSystem.getRecordPosition(id, out recordPos);

        uint recordDelta = (recordPos >= lastRecordPos) ? (recordPos - lastRecordPos) : (recordPos + recSoundLength - lastRecordPos);
        lastRecordPos = recordPos;
        samplesRecorded += recordDelta;

        uint minRecordDelta = 0;
        if (recordDelta != 0 && (recordDelta < minRecordDelta))
        {
            minRecordDelta = recordDelta; // Smallest driver granularity seen so far
            adjustLatency = (recordDelta <= desiredLatency) ? desiredLatency : recordDelta; // Adjust our latency if driver granularity is high
        }

        /*
            Delay playback until our desired latency is reached.
        */
        if (!channel.hasHandle() && samplesRecorded >= adjustLatency)
        {
            FMODUnity.RuntimeManager.CoreSystem.getMasterChannelGroup(out FMOD.ChannelGroup mCG);
            FMODUnity.RuntimeManager.CoreSystem.playSound(recSound, mCG, false, out channel);

            //FFT DSP
            if (fmodSystem.createDSPByType(FMOD.DSP_TYPE.FFT, out mFFT) == FMOD.RESULT.OK)
            {
                mFFT.setParameterInt((int)FMOD.DSP_FFT.WINDOWTYPE, (int)FMOD.DSP_FFT_WINDOW.HANNING);
                mFFT.setParameterInt((int)FMOD.DSP_FFT.WINDOWSIZE, WindowSize * 2);
                FMODUnity.RuntimeManager.StudioSystem.flushCommands();

                if (channel.addDSP(FMOD.CHANNELCONTROL_DSP_INDEX.HEAD, mFFT) != FMOD.RESULT.OK)
                    UnityEngine.Debug.LogError("FMOD: Unable to add mFFT to the master channel group");
            }
        }

        /*
            Determine how much has been played since we last checked.
        */
        if (channel.hasHandle())
        {
            uint playPos = 0;
            channel.getPosition(out playPos, FMOD.TIMEUNIT.PCM);

            uint playDelta = (playPos >= lastPlayPos) ? (playPos - lastPlayPos) : (playPos + recSoundLength - lastPlayPos);
            lastPlayPos = playPos;
            samplesPlayed += playDelta;

            // Compensate for any drift.
            int latency = (int)(samplesRecorded - samplesPlayed);
            actualLatency = (int)((0.97f * actualLatency) + (0.03f * latency));

            int playbackRate = nativeRate;
            if (actualLatency < (int)(adjustLatency - driftThreshold))
            {
                // Playback position is catching up to the record position, slow playback down by 2%
                playbackRate = nativeRate - (nativeRate / 50);
            }

            else if (actualLatency > (int)(adjustLatency + driftThreshold))
            {
                // Playback is falling behind the record position, speed playback up by 2%
                playbackRate = nativeRate + (nativeRate / 50);
            }

            channel.setFrequency((float)playbackRate);

            // GetSpectrum
            if (mFFT.hasHandle())
            {
                uint length;
                IntPtr unmanagedData;
                if (mFFT.getParameterData((int)FMOD.DSP_FFT.SPECTRUMDATA, out unmanagedData, out length) == FMOD.RESULT.OK)
                {

                    FMOD.DSP_PARAMETER_FFT fftData = (FMOD.DSP_PARAMETER_FFT)Marshal.PtrToStructure(unmanagedData, typeof(FMOD.DSP_PARAMETER_FFT));

                    if (fftData.numchannels > 0)
                    {
                        if (mFFTSpectrum == null)
                        {
                            // Allocate the fft spectrum buffer once
                            for (int i = 0; i < fftData.numchannels; ++i)
                            {
                                mFFTSpectrum = new float[fftData.length];
                            }
                        }
                        fftData.getSpectrum(0, ref mFFTSpectrum);
                    }
                }
            }
        }
    }

    private void OnDestroy()
    {
        channel.removeDSP(mFFT);
        recSound.release();
    }
}

I can hear the record, the DSP is added and there is 1 channel detected before to get the spectrum, but the result is either 0 or something e-8, so almost 0.
Because I know pretty much nothing it may be just the settings between the two parts which don’t align.

Could you help me, maybe just even by pointing some documentation ?

Thanks a lot and have a nice day !

Hi,

Thank you for sharing the code.

The problem is fftData.getSpectrum has been deprecated, outlined in documentation here: FMOD Engine | White Papers - System::getSpectrum and System::getWaveData removed.

So to retrieve the pitch I used:

// GetSpectrum
if (mFFT.hasHandle())
{
	if (mFFT.getParameterFloat(3, out float pitch) == FMOD.RESULT.OK)
				Debug.Log($"Pitch {pitch}");
}

To find the correct index I checked: FMOD Engine | Core API Reference - FMOD_DSP_FFT:

enum DSP_FFT
{
    WINDOWSIZE,
    WINDOWTYPE,
    SPECTRUMDATA,
    DOMINANT_FREQ // <--
}

Hope this helps!

Thanks, I have something which looks like a pitch now !
Btw I’m on the team which would use (int)DSP_FFT.DOMINANT_FREQ instead of 3.

Anyway, big thanks I can go back working on this karaoke thing !

1 Like

I like your team much more haha, thank you for sharing!