| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /** |
| * Tools for measuring latency and for detecting glitches. |
| * These classes are pure math and can be used with any audio system. |
| */ |
| |
| #ifndef ANALYZER_LATENCY_ANALYZER_H |
| #define ANALYZER_LATENCY_ANALYZER_H |
| |
| #include <algorithm> |
| #include <assert.h> |
| #include <cctype> |
| #include <iomanip> |
| #include <iostream> |
| #include <math.h> |
| #include <memory> |
| #include <sstream> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <vector> |
| |
| #include "PeakDetector.h" |
| #include "PseudoRandom.h" |
| #include "RandomPulseGenerator.h" |
| |
| // This is used when the code is in not in Android. |
| #ifndef ALOGD |
| #define ALOGD LOGD |
| #define ALOGE LOGE |
| #define ALOGW LOGW |
| #endif |
| |
| #define LOOPBACK_RESULT_TAG "RESULT: " |
| |
| static constexpr int32_t kDefaultSampleRate = 48000; |
| static constexpr int32_t kMillisPerSecond = 1000; // by definition |
| static constexpr int32_t kMaxLatencyMillis = 1000; // arbitrary and generous |
| |
| struct LatencyReport { |
| int32_t latencyInFrames = 0.0; |
| double correlation = 0.0; |
| |
| void reset() { |
| latencyInFrames = 0; |
| correlation = 0.0; |
| } |
| }; |
| |
| /** |
| * Calculate a normalized cross correlation. |
| * @return value between -1.0 and 1.0 |
| */ |
| |
| static float calculateNormalizedCorrelation(const float *a, |
| const float *b, |
| int windowSize) { |
| float correlation = 0.0; |
| float sumProducts = 0.0; |
| float sumSquares = 0.0; |
| |
| // Correlate a against b. |
| for (int i = 0; i < windowSize; i++) { |
| float s1 = a[i]; |
| float s2 = b[i]; |
| // Use a normalized cross-correlation. |
| sumProducts += s1 * s2; |
| sumSquares += ((s1 * s1) + (s2 * s2)); |
| } |
| |
| if (sumSquares >= 1.0e-9) { |
| correlation = 2.0 * sumProducts / sumSquares; |
| } |
| return correlation; |
| } |
| |
| static double calculateRootMeanSquare(float *data, int32_t numSamples) { |
| double sum = 0.0; |
| for (int32_t i = 0; i < numSamples; i++) { |
| double sample = data[i]; |
| sum += sample * sample; |
| } |
| return sqrt(sum / numSamples); |
| } |
| |
| /** |
| * Monophonic recording with processing. |
| * Samples are stored as floats internally. |
| */ |
| class AudioRecording |
| { |
| public: |
| |
| void allocate(int maxFrames) { |
| mData = std::make_unique<float[]>(maxFrames); |
| mMaxFrames = maxFrames; |
| mFrameCounter = 0; |
| } |
| |
| // Write SHORT data from the first channel. |
| int32_t write(const int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) { |
| // stop at end of buffer |
| if ((mFrameCounter + numFrames) > mMaxFrames) { |
| numFrames = mMaxFrames - mFrameCounter; |
| } |
| for (int i = 0; i < numFrames; i++) { |
| mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768); |
| } |
| return numFrames; |
| } |
| |
| // Write FLOAT data from the first channel. |
| int32_t write(const float *inputData, int32_t inputChannelCount, int32_t numFrames) { |
| // stop at end of buffer |
| if ((mFrameCounter + numFrames) > mMaxFrames) { |
| numFrames = mMaxFrames - mFrameCounter; |
| } |
| for (int i = 0; i < numFrames; i++) { |
| mData[mFrameCounter++] = inputData[i * inputChannelCount]; |
| } |
| return numFrames; |
| } |
| |
| // Write single FLOAT value. |
| int32_t write(float sample) { |
| // stop at end of buffer |
| if (mFrameCounter < mMaxFrames) { |
| mData[mFrameCounter++] = sample; |
| return 1; |
| } |
| return 0; |
| } |
| |
| void clear() { |
| mFrameCounter = 0; |
| } |
| |
| int32_t size() const { |
| return mFrameCounter; |
| } |
| |
| bool isFull() const { |
| return mFrameCounter >= mMaxFrames; |
| } |
| |
| float *getData() const { |
| return mData.get(); |
| } |
| |
| void setSampleRate(int32_t sampleRate) { |
| mSampleRate = sampleRate; |
| } |
| |
| int32_t getSampleRate() const { |
| return mSampleRate; |
| } |
| |
| /** |
| * Square the samples so they are all positive and so the peaks are emphasized. |
| */ |
| void square() { |
| float *x = mData.get(); |
| for (int i = 0; i < mFrameCounter; i++) { |
| x[i] *= x[i]; |
| } |
| } |
| |
| // Envelope follower that rides over the peak values. |
| void detectPeaks(float decay) { |
| float level = 0.0f; |
| float *x = mData.get(); |
| for (int i = 0; i < mFrameCounter; i++) { |
| level *= decay; // exponential decay |
| float input = fabs(x[i]); |
| // never fall below the input signal |
| if (input > level) { |
| level = input; |
| } |
| x[i] = level; // write result back into the array |
| } |
| } |
| |
| /** |
| * Amplify a signal so that the peak matches the specified target. |
| * |
| * @param target final max value |
| * @return gain applied to signal |
| */ |
| float normalize(float target) { |
| float maxValue = 1.0e-9f; |
| for (int i = 0; i < mFrameCounter; i++) { |
| maxValue = std::max(maxValue, abs(mData[i])); |
| } |
| float gain = target / maxValue; |
| for (int i = 0; i < mFrameCounter; i++) { |
| mData[i] *= gain; |
| } |
| return gain; |
| } |
| |
| private: |
| std::unique_ptr<float[]> mData; |
| int32_t mFrameCounter = 0; |
| int32_t mMaxFrames = 0; |
| int32_t mSampleRate = kDefaultSampleRate; // common default |
| }; |
| |
| static int measureLatencyFromPulse(AudioRecording &recorded, |
| AudioRecording &pulse, |
| LatencyReport *report) { |
| |
| report->reset(); |
| |
| int numCorrelations = recorded.size() - pulse.size(); |
| if (numCorrelations < 10) { |
| ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size()); |
| return -1; |
| } |
| std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations); |
| |
| // Correlate pulse against the recorded data. |
| for (int i = 0; i < numCorrelations; i++) { |
| float correlation = calculateNormalizedCorrelation(&recorded.getData()[i], |
| &pulse.getData()[0], |
| pulse.size()); |
| correlations[i] = correlation; |
| } |
| |
| // Find highest peak in correlation array. |
| float peakCorrelation = 0.0; |
| int peakIndex = -1; |
| for (int i = 0; i < numCorrelations; i++) { |
| float value = abs(correlations[i]); |
| if (value > peakCorrelation) { |
| peakCorrelation = value; |
| peakIndex = i; |
| } |
| } |
| if (peakIndex < 0) { |
| ALOGE("%s() no signal for correlation\n", __func__); |
| return -2; |
| } |
| #if 0 |
| // Dump correlation data for charting. |
| else { |
| const int margin = 50; |
| int startIndex = std::max(0, peakIndex - margin); |
| int endIndex = std::min(numCorrelations - 1, peakIndex + margin); |
| for (int index = startIndex; index < endIndex; index++) { |
| ALOGD("Correlation, %d, %f", index, correlations[index]); |
| } |
| } |
| #endif |
| |
| report->latencyInFrames = peakIndex; |
| report->correlation = peakCorrelation; |
| |
| return 0; |
| } |
| |
| // ==================================================================================== |
| class LoopbackProcessor { |
| public: |
| virtual ~LoopbackProcessor() = default; |
| |
| enum result_code { |
| RESULT_OK = 0, |
| ERROR_NOISY = -99, |
| ERROR_VOLUME_TOO_LOW, |
| ERROR_VOLUME_TOO_HIGH, |
| ERROR_CONFIDENCE, |
| ERROR_INVALID_STATE, |
| ERROR_GLITCHES, |
| ERROR_NO_LOCK |
| }; |
| |
| virtual void prepareToTest() { |
| reset(); |
| } |
| |
| virtual void reset() { |
| mResult = 0; |
| mResetCount++; |
| } |
| |
| virtual result_code processInputFrame(const float *frameData, int channelCount) = 0; |
| virtual result_code processOutputFrame(float *frameData, int channelCount) = 0; |
| |
| void process(const float *inputData, int inputChannelCount, int numInputFrames, |
| float *outputData, int outputChannelCount, int numOutputFrames) { |
| int numBoth = std::min(numInputFrames, numOutputFrames); |
| // Process one frame at a time. |
| for (int i = 0; i < numBoth; i++) { |
| processInputFrame(inputData, inputChannelCount); |
| inputData += inputChannelCount; |
| processOutputFrame(outputData, outputChannelCount); |
| outputData += outputChannelCount; |
| } |
| // If there is more input than output. |
| for (int i = numBoth; i < numInputFrames; i++) { |
| processInputFrame(inputData, inputChannelCount); |
| inputData += inputChannelCount; |
| } |
| // If there is more output than input. |
| for (int i = numBoth; i < numOutputFrames; i++) { |
| processOutputFrame(outputData, outputChannelCount); |
| outputData += outputChannelCount; |
| } |
| } |
| |
| virtual std::string analyze() = 0; |
| |
| virtual void printStatus() {}; |
| |
| int32_t getResult() { |
| return mResult; |
| } |
| |
| void setResult(int32_t result) { |
| mResult = result; |
| } |
| |
| virtual bool isDone() { |
| return false; |
| } |
| |
| virtual int save(const char *fileName) { |
| (void) fileName; |
| return -1; |
| } |
| |
| virtual int load(const char *fileName) { |
| (void) fileName; |
| return -1; |
| } |
| |
| virtual void setSampleRate(int32_t sampleRate) { |
| mSampleRate = sampleRate; |
| } |
| |
| int32_t getSampleRate() const { |
| return mSampleRate; |
| } |
| |
| int32_t getResetCount() const { |
| return mResetCount; |
| } |
| |
| /** Called when not enough input frames could be read after synchronization. |
| */ |
| virtual void onInsufficientRead() { |
| reset(); |
| } |
| |
| protected: |
| int32_t mResetCount = 0; |
| |
| private: |
| int32_t mSampleRate = kDefaultSampleRate; |
| int32_t mResult = 0; |
| }; |
| |
| class LatencyAnalyzer : public LoopbackProcessor { |
| public: |
| |
| LatencyAnalyzer() : LoopbackProcessor() {} |
| virtual ~LatencyAnalyzer() = default; |
| |
| /** |
| * Call this after the constructor because it calls other virtual methods. |
| */ |
| virtual void setup() = 0; |
| |
| virtual int32_t getProgress() const = 0; |
| |
| virtual int getState() const = 0; |
| |
| // @return latency in frames |
| virtual int32_t getMeasuredLatency() const = 0; |
| |
| /** |
| * This is an overall confidence in the latency result based on correlation, SNR, etc. |
| * @return probability value between 0.0 and 1.0 |
| */ |
| double getMeasuredConfidence() const { |
| // Limit the ratio and prevent divide-by-zero. |
| double noiseSignalRatio = getSignalRMS() <= getBackgroundRMS() |
| ? 1.0 : getBackgroundRMS() / getSignalRMS(); |
| // Prevent high background noise and low signals from generating false matches. |
| double adjustedConfidence = getMeasuredCorrelation() - noiseSignalRatio; |
| return std::max(0.0, adjustedConfidence); |
| } |
| |
| /** |
| * Cross correlation value for the noise pulse against |
| * the corresponding position in the normalized recording. |
| * |
| * @return value between -1.0 and 1.0 |
| */ |
| virtual double getMeasuredCorrelation() const = 0; |
| |
| virtual double getBackgroundRMS() const = 0; |
| |
| virtual double getSignalRMS() const = 0; |
| |
| virtual bool hasEnoughData() const = 0; |
| }; |
| |
| // ==================================================================================== |
| /** |
| * Measure latency given a loopback stream data. |
| * Use an encoded bit train as the sound source because it |
| * has an unambiguous correlation value. |
| * Uses a state machine to cycle through various stages. |
| * |
| */ |
| class PulseLatencyAnalyzer : public LatencyAnalyzer { |
| public: |
| |
| void setup() override { |
| int32_t pulseLength = calculatePulseLength(); |
| int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond; |
| mFramesToRecord = pulseLength + maxLatencyFrames; |
| mAudioRecording.allocate(mFramesToRecord); |
| mAudioRecording.setSampleRate(getSampleRate()); |
| } |
| |
| int getState() const override { |
| return mState; |
| } |
| |
| void setSampleRate(int32_t sampleRate) override { |
| LoopbackProcessor::setSampleRate(sampleRate); |
| mAudioRecording.setSampleRate(sampleRate); |
| } |
| |
| void reset() override { |
| LoopbackProcessor::reset(); |
| mState = STATE_MEASURE_BACKGROUND; |
| mDownCounter = (int32_t) (getSampleRate() * kBackgroundMeasurementLengthSeconds); |
| mLoopCounter = 0; |
| |
| mPulseCursor = 0; |
| mBackgroundSumSquare = 0.0f; |
| mBackgroundSumCount = 0; |
| mBackgroundRMS = 0.0f; |
| mSignalRMS = 0.0f; |
| |
| generatePulseRecording(calculatePulseLength()); |
| mAudioRecording.clear(); |
| mLatencyReport.reset(); |
| } |
| |
| bool hasEnoughData() const override { |
| return mAudioRecording.isFull(); |
| } |
| |
| bool isDone() override { |
| return mState == STATE_DONE; |
| } |
| |
| int32_t getProgress() const override { |
| return mAudioRecording.size(); |
| } |
| |
| std::string analyze() override { |
| std::stringstream report; |
| report << "PulseLatencyAnalyzer ---------------\n"; |
| report << LOOPBACK_RESULT_TAG "test.state = " |
| << std::setw(8) << mState << "\n"; |
| report << LOOPBACK_RESULT_TAG "test.state.name = " |
| << convertStateToText(mState) << "\n"; |
| report << LOOPBACK_RESULT_TAG "background.rms = " |
| << std::setw(8) << mBackgroundRMS << "\n"; |
| |
| int32_t newResult = RESULT_OK; |
| if (mState != STATE_GOT_DATA) { |
| report << "WARNING - Bad state. Check volume on device.\n"; |
| // setResult(ERROR_INVALID_STATE); |
| } else { |
| float gain = mAudioRecording.normalize(1.0f); |
| measureLatency(); |
| |
| // Calculate signalRMS even if it is bogus. |
| // Also it may be used in the confidence calculation below. |
| mSignalRMS = calculateRootMeanSquare( |
| &mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size()) |
| / gain; |
| if (getMeasuredConfidence() < getMinimumConfidence()) { |
| report << " ERROR - confidence too low!"; |
| newResult = ERROR_CONFIDENCE; |
| } |
| |
| double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames |
| / getSampleRate(); |
| report << LOOPBACK_RESULT_TAG "latency.frames = " << std::setw(8) |
| << mLatencyReport.latencyInFrames << "\n"; |
| report << LOOPBACK_RESULT_TAG "latency.msec = " << std::setw(8) |
| << latencyMillis << "\n"; |
| report << LOOPBACK_RESULT_TAG "latency.confidence = " << std::setw(8) |
| << getMeasuredConfidence() << "\n"; |
| report << LOOPBACK_RESULT_TAG "latency.correlation = " << std::setw(8) |
| << getMeasuredCorrelation() << "\n"; |
| } |
| mState = STATE_DONE; |
| if (getResult() == RESULT_OK) { |
| setResult(newResult); |
| } |
| |
| return report.str(); |
| } |
| |
| int32_t getMeasuredLatency() const override { |
| return mLatencyReport.latencyInFrames; |
| } |
| |
| double getMeasuredCorrelation() const override { |
| return mLatencyReport.correlation; |
| } |
| |
| double getBackgroundRMS() const override { |
| return mBackgroundRMS; |
| } |
| |
| double getSignalRMS() const override { |
| return mSignalRMS; |
| } |
| |
| bool isRecordingComplete() { |
| return mState == STATE_GOT_DATA; |
| } |
| |
| void printStatus() override { |
| ALOGD("latency: st = %d = %s", mState, convertStateToText(mState)); |
| } |
| |
| result_code processInputFrame(const float *frameData, int /* channelCount */) override { |
| echo_state nextState = mState; |
| mLoopCounter++; |
| float input = frameData[0]; |
| |
| switch (mState) { |
| case STATE_MEASURE_BACKGROUND: |
| // Measure background RMS on channel 0 |
| mBackgroundSumSquare += static_cast<double>(input) * input; |
| mBackgroundSumCount++; |
| mDownCounter--; |
| if (mDownCounter <= 0) { |
| mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount); |
| nextState = STATE_IN_PULSE; |
| mPulseCursor = 0; |
| } |
| break; |
| |
| case STATE_IN_PULSE: |
| // Record input until the mAudioRecording is full. |
| mAudioRecording.write(input); |
| if (hasEnoughData()) { |
| nextState = STATE_GOT_DATA; |
| } |
| break; |
| |
| case STATE_GOT_DATA: |
| case STATE_DONE: |
| default: |
| break; |
| } |
| |
| mState = nextState; |
| return RESULT_OK; |
| } |
| |
| result_code processOutputFrame(float *frameData, int channelCount) override { |
| switch (mState) { |
| case STATE_IN_PULSE: |
| if (mPulseCursor < mPulse.size()) { |
| float pulseSample = mPulse.getData()[mPulseCursor++]; |
| for (int i = 0; i < channelCount; i++) { |
| frameData[i] = pulseSample; |
| } |
| } else { |
| for (int i = 0; i < channelCount; i++) { |
| frameData[i] = 0; |
| } |
| } |
| break; |
| |
| case STATE_MEASURE_BACKGROUND: |
| case STATE_GOT_DATA: |
| case STATE_DONE: |
| default: |
| for (int i = 0; i < channelCount; i++) { |
| frameData[i] = 0.0f; // silence |
| } |
| break; |
| } |
| |
| return RESULT_OK; |
| } |
| |
| protected: |
| |
| virtual int32_t calculatePulseLength() const = 0; |
| |
| virtual void generatePulseRecording(int32_t pulseLength) = 0; |
| |
| virtual void measureLatency() = 0; |
| |
| virtual double getMinimumConfidence() const { |
| return 0.5; |
| } |
| |
| AudioRecording mPulse; |
| AudioRecording mAudioRecording; // contains only the input after starting the pulse |
| LatencyReport mLatencyReport; |
| |
| static constexpr int32_t kPulseLengthMillis = 500; |
| float mPulseAmplitude = 0.5f; |
| double mBackgroundRMS = 0.0; |
| double mSignalRMS = 0.0; |
| |
| private: |
| |
| enum echo_state { |
| STATE_MEASURE_BACKGROUND, |
| STATE_IN_PULSE, |
| STATE_GOT_DATA, // must match RoundTripLatencyActivity.java |
| STATE_DONE, |
| }; |
| |
| const char *convertStateToText(echo_state state) { |
| switch (state) { |
| case STATE_MEASURE_BACKGROUND: |
| return "INIT"; |
| case STATE_IN_PULSE: |
| return "PULSE"; |
| case STATE_GOT_DATA: |
| return "GOT_DATA"; |
| case STATE_DONE: |
| return "DONE"; |
| } |
| return "UNKNOWN"; |
| } |
| |
| int32_t mDownCounter = 500; |
| int32_t mLoopCounter = 0; |
| echo_state mState = STATE_MEASURE_BACKGROUND; |
| |
| static constexpr double kBackgroundMeasurementLengthSeconds = 0.5; |
| |
| int32_t mPulseCursor = 0; |
| |
| double mBackgroundSumSquare = 0.0; |
| int32_t mBackgroundSumCount = 0; |
| int32_t mFramesToRecord = 0; |
| |
| }; |
| |
| /** |
| * This algorithm uses a series of random bits encoded using the |
| * Manchester encoder. It works well for wired loopback but not very well for |
| * through the air loopback. |
| */ |
| class EncodedRandomLatencyAnalyzer : public PulseLatencyAnalyzer { |
| |
| protected: |
| |
| int32_t calculatePulseLength() const override { |
| // Calculate integer number of bits. |
| int32_t numPulseBits = getSampleRate() * kPulseLengthMillis |
| / (kFramesPerEncodedBit * kMillisPerSecond); |
| return numPulseBits * kFramesPerEncodedBit; |
| } |
| |
| void generatePulseRecording(int32_t pulseLength) override { |
| mPulse.allocate(pulseLength); |
| RandomPulseGenerator pulser(kFramesPerEncodedBit); |
| for (int i = 0; i < pulseLength; i++) { |
| mPulse.write(pulser.nextFloat() * mPulseAmplitude); |
| } |
| } |
| |
| double getMinimumConfidence() const override { |
| return 0.2; |
| } |
| |
| void measureLatency() override { |
| measureLatencyFromPulse(mAudioRecording, |
| mPulse, |
| &mLatencyReport); |
| } |
| |
| private: |
| static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2 |
| }; |
| |
| /** |
| * This algorithm uses White Noise sent in a short burst pattern. |
| * The original signal and the recorded signal are then run through |
| * an envelope follower to convert the fine detail into more of |
| * a rectangular block before the correlation phase. |
| */ |
| class WhiteNoiseLatencyAnalyzer : public PulseLatencyAnalyzer { |
| |
| protected: |
| |
| int32_t calculatePulseLength() const override { |
| return getSampleRate() * kPulseLengthMillis / kMillisPerSecond; |
| } |
| |
| void generatePulseRecording(int32_t pulseLength) override { |
| mPulse.allocate(pulseLength); |
| // Turn the noise on and off to sharpen the correlation peak. |
| // Use more zeros than ones so that the correlation will be less than 0.5 even when there |
| // is a strong background noise. |
| int8_t pattern[] = {1, 0, 0, |
| 1, 1, 0, 0, 0, |
| 1, 1, 1, 0, 0, 0, 0, |
| 1, 1, 1, 1, 0, 0, 0, 0, 0 |
| }; |
| PseudoRandom random; |
| const int32_t numSections = sizeof(pattern); |
| const int32_t framesPerSection = pulseLength / numSections; |
| for (int section = 0; section < numSections; section++) { |
| if (pattern[section]) { |
| for (int i = 0; i < framesPerSection; i++) { |
| mPulse.write((float) (random.nextRandomDouble() * mPulseAmplitude)); |
| } |
| } else { |
| for (int i = 0; i < framesPerSection; i++) { |
| mPulse.write(0.0f); |
| } |
| } |
| } |
| // Write any remaining frames. |
| int32_t framesWritten = framesPerSection * numSections; |
| for (int i = framesWritten; i < pulseLength; i++) { |
| mPulse.write(0.0f); |
| } |
| } |
| |
| void measureLatency() override { |
| // Smooth out the noise so we see rectangular blocks. |
| // This improves immunity against phase cancellation and distortion. |
| static constexpr float decay = 0.99f; // just under 1.0, lower numbers decay faster |
| mAudioRecording.detectPeaks(decay); |
| mPulse.detectPeaks(decay); |
| measureLatencyFromPulse(mAudioRecording, |
| mPulse, |
| &mLatencyReport); |
| } |
| |
| }; |
| |
| #endif // ANALYZER_LATENCY_ANALYZER_H |