blob: e5067916b5bd8ffe95076af6af098a5926eda729 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Tools for measuring latency and for detecting glitches.
* These classes are pure math and can be used with any audio system.
*/
#ifndef ANALYZER_LATENCY_ANALYZER_H
#define ANALYZER_LATENCY_ANALYZER_H
#include <algorithm>
#include <assert.h>
#include <cctype>
#include <iomanip>
#include <iostream>
#include <math.h>
#include <memory>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <vector>
#include "PeakDetector.h"
#include "PseudoRandom.h"
#include "RandomPulseGenerator.h"
// This is used when the code is in Oboe.
#ifndef ALOGD
#define ALOGD printf
#define ALOGE printf
#define ALOGW printf
#endif
#define LOOPBACK_RESULT_TAG "RESULT: "
static constexpr int32_t kDefaultSampleRate = 48000;
static constexpr int32_t kMillisPerSecond = 1000;
static constexpr int32_t kMaxLatencyMillis = 700; // arbitrary and generous
static constexpr double kMinimumConfidence = 0.2;
struct LatencyReport {
int32_t latencyInFrames = 0.0;
double confidence = 0.0;
void reset() {
latencyInFrames = 0;
confidence = 0.0;
}
};
// Calculate a normalized cross correlation.
static double calculateNormalizedCorrelation(const float *a,
const float *b,
int windowSize) {
double correlation = 0.0;
double sumProducts = 0.0;
double sumSquares = 0.0;
// Correlate a against b.
for (int i = 0; i < windowSize; i++) {
float s1 = a[i];
float s2 = b[i];
// Use a normalized cross-correlation.
sumProducts += s1 * s2;
sumSquares += ((s1 * s1) + (s2 * s2));
}
if (sumSquares >= 1.0e-9) {
correlation = 2.0 * sumProducts / sumSquares;
}
return correlation;
}
static double calculateRootMeanSquare(float *data, int32_t numSamples) {
double sum = 0.0;
for (int32_t i = 0; i < numSamples; i++) {
float sample = data[i];
sum += sample * sample;
}
return sqrt(sum / numSamples);
}
/**
* Monophonic recording with processing.
*/
class AudioRecording
{
public:
void allocate(int maxFrames) {
mData = std::make_unique<float[]>(maxFrames);
mMaxFrames = maxFrames;
}
// Write SHORT data from the first channel.
int32_t write(int16_t *inputData, int32_t inputChannelCount, int32_t numFrames) {
// stop at end of buffer
if ((mFrameCounter + numFrames) > mMaxFrames) {
numFrames = mMaxFrames - mFrameCounter;
}
for (int i = 0; i < numFrames; i++) {
mData[mFrameCounter++] = inputData[i * inputChannelCount] * (1.0f / 32768);
}
return numFrames;
}
// Write FLOAT data from the first channel.
int32_t write(float *inputData, int32_t inputChannelCount, int32_t numFrames) {
// stop at end of buffer
if ((mFrameCounter + numFrames) > mMaxFrames) {
numFrames = mMaxFrames - mFrameCounter;
}
for (int i = 0; i < numFrames; i++) {
mData[mFrameCounter++] = inputData[i * inputChannelCount];
}
return numFrames;
}
// Write FLOAT data from the first channel.
int32_t write(float sample) {
// stop at end of buffer
if (mFrameCounter < mMaxFrames) {
mData[mFrameCounter++] = sample;
return 1;
}
return 0;
}
void clear() {
mFrameCounter = 0;
}
int32_t size() const {
return mFrameCounter;
}
bool isFull() const {
return mFrameCounter >= mMaxFrames;
}
float *getData() const {
return mData.get();
}
void setSampleRate(int32_t sampleRate) {
mSampleRate = sampleRate;
}
int32_t getSampleRate() const {
return mSampleRate;
}
/**
* Square the samples so they are all positive and so the peaks are emphasized.
*/
void square() {
float *x = mData.get();
for (int i = 0; i < mFrameCounter; i++) {
x[i] *= x[i];
}
}
/**
* Amplify a signal so that the peak matches the specified target.
*
* @param target final max value
* @return gain applied to signal
*/
float normalize(float target) {
float maxValue = 1.0e-9f;
for (int i = 0; i < mFrameCounter; i++) {
maxValue = std::max(maxValue, abs(mData[i]));
}
float gain = target / maxValue;
for (int i = 0; i < mFrameCounter; i++) {
mData[i] *= gain;
}
return gain;
}
private:
std::unique_ptr<float[]> mData;
int32_t mFrameCounter = 0;
int32_t mMaxFrames = 0;
int32_t mSampleRate = kDefaultSampleRate; // common default
};
static int measureLatencyFromPulse(AudioRecording &recorded,
AudioRecording &pulse,
LatencyReport *report) {
report->latencyInFrames = 0;
report->confidence = 0.0;
int numCorrelations = recorded.size() - pulse.size();
if (numCorrelations < 10) {
ALOGE("%s() recording too small = %d frames\n", __func__, recorded.size());
return -1;
}
std::unique_ptr<float[]> correlations= std::make_unique<float[]>(numCorrelations);
// Correlate pulse against the recorded data.
for (int i = 0; i < numCorrelations; i++) {
float correlation = (float) calculateNormalizedCorrelation(&recorded.getData()[i],
&pulse.getData()[0],
pulse.size());
correlations[i] = correlation;
}
// Find highest peak in correlation array.
float peakCorrelation = 0.0;
int peakIndex = -1;
for (int i = 0; i < numCorrelations; i++) {
float value = abs(correlations[i]);
if (value > peakCorrelation) {
peakCorrelation = value;
peakIndex = i;
}
}
if (peakIndex < 0) {
ALOGE("%s() no signal for correlation\n", __func__);
return -2;
}
report->latencyInFrames = peakIndex;
report->confidence = peakCorrelation;
return 0;
}
// ====================================================================================
class LoopbackProcessor {
public:
virtual ~LoopbackProcessor() = default;
enum result_code {
RESULT_OK = 0,
ERROR_NOISY = -99,
ERROR_VOLUME_TOO_LOW,
ERROR_VOLUME_TOO_HIGH,
ERROR_CONFIDENCE,
ERROR_INVALID_STATE,
ERROR_GLITCHES,
ERROR_NO_LOCK
};
virtual void prepareToTest() {
reset();
}
virtual void reset() {
mResult = 0;
mResetCount++;
}
virtual result_code processInputFrame(float *frameData, int channelCount) = 0;
virtual result_code processOutputFrame(float *frameData, int channelCount) = 0;
void process(float *inputData, int inputChannelCount, int numInputFrames,
float *outputData, int outputChannelCount, int numOutputFrames) {
int numBoth = std::min(numInputFrames, numOutputFrames);
// Process one frame at a time.
for (int i = 0; i < numBoth; i++) {
processInputFrame(inputData, inputChannelCount);
inputData += inputChannelCount;
processOutputFrame(outputData, outputChannelCount);
outputData += outputChannelCount;
}
// If there is more input than output.
for (int i = numBoth; i < numInputFrames; i++) {
processInputFrame(inputData, inputChannelCount);
inputData += inputChannelCount;
}
// If there is more output than input.
for (int i = numBoth; i < numOutputFrames; i++) {
processOutputFrame(outputData, outputChannelCount);
outputData += outputChannelCount;
}
}
virtual std::string analyze() = 0;
virtual void printStatus() {};
int32_t getResult() {
return mResult;
}
void setResult(int32_t result) {
mResult = result;
}
virtual bool isDone() {
return false;
}
virtual int save(const char *fileName) {
(void) fileName;
return -1;
}
virtual int load(const char *fileName) {
(void) fileName;
return -1;
}
virtual void setSampleRate(int32_t sampleRate) {
mSampleRate = sampleRate;
}
int32_t getSampleRate() const {
return mSampleRate;
}
int32_t getResetCount() const {
return mResetCount;
}
/** Called when not enough input frames could be read after synchronization.
*/
virtual void onInsufficientRead() {
reset();
}
protected:
int32_t mResetCount = 0;
private:
int32_t mSampleRate = kDefaultSampleRate;
int32_t mResult = 0;
};
class LatencyAnalyzer : public LoopbackProcessor {
public:
LatencyAnalyzer() : LoopbackProcessor() {}
virtual ~LatencyAnalyzer() = default;
virtual int32_t getProgress() const = 0;
virtual int getState() = 0;
// @return latency in frames
virtual int32_t getMeasuredLatency() = 0;
virtual double getMeasuredConfidence() = 0;
virtual double getBackgroundRMS() = 0;
virtual double getSignalRMS() = 0;
};
// ====================================================================================
/**
* Measure latency given a loopback stream data.
* Use an encoded bit train as the sound source because it
* has an unambiguous correlation value.
* Uses a state machine to cycle through various stages.
*
*/
class PulseLatencyAnalyzer : public LatencyAnalyzer {
public:
PulseLatencyAnalyzer() : LatencyAnalyzer() {
int32_t maxLatencyFrames = getSampleRate() * kMaxLatencyMillis / kMillisPerSecond;
int32_t numPulseBits = getSampleRate() * kPulseLengthMillis
/ (kFramesPerEncodedBit * kMillisPerSecond);
int32_t pulseLength = numPulseBits * kFramesPerEncodedBit;
mFramesToRecord = pulseLength + maxLatencyFrames;
mAudioRecording.allocate(mFramesToRecord);
mAudioRecording.setSampleRate(getSampleRate());
generateRandomPulse(pulseLength);
}
void generateRandomPulse(int32_t pulseLength) {
mPulse.allocate(pulseLength);
RandomPulseGenerator pulser(kFramesPerEncodedBit);
for (int i = 0; i < pulseLength; i++) {
mPulse.write(pulser.nextFloat());
}
}
int getState() override {
return mState;
}
void setSampleRate(int32_t sampleRate) override {
LoopbackProcessor::setSampleRate(sampleRate);
mAudioRecording.setSampleRate(sampleRate);
}
void reset() override {
LoopbackProcessor::reset();
mDownCounter = getSampleRate() / 2;
mLoopCounter = 0;
mPulseCursor = 0;
mBackgroundSumSquare = 0.0f;
mBackgroundSumCount = 0;
mBackgroundRMS = 0.0f;
mSignalRMS = 0.0f;
mState = STATE_MEASURE_BACKGROUND;
mAudioRecording.clear();
mLatencyReport.reset();
}
bool hasEnoughData() {
return mAudioRecording.isFull();
}
bool isDone() override {
return mState == STATE_DONE;
}
int32_t getProgress() const override {
return mAudioRecording.size();
}
std::string analyze() override {
std::stringstream report;
report << "PulseLatencyAnalyzer ---------------\n";
report << LOOPBACK_RESULT_TAG "test.state = "
<< std::setw(8) << mState << "\n";
report << LOOPBACK_RESULT_TAG "test.state.name = "
<< convertStateToText(mState) << "\n";
report << LOOPBACK_RESULT_TAG "background.rms = "
<< std::setw(8) << mBackgroundRMS << "\n";
int32_t newResult = RESULT_OK;
if (mState != STATE_GOT_DATA) {
report << "WARNING - Bad state. Check volume on device.\n";
// setResult(ERROR_INVALID_STATE);
} else {
float gain = mAudioRecording.normalize(1.0f);
measureLatencyFromPulse(mAudioRecording,
mPulse,
&mLatencyReport);
if (mLatencyReport.confidence < kMinimumConfidence) {
report << " ERROR - confidence too low!";
newResult = ERROR_CONFIDENCE;
} else {
mSignalRMS = calculateRootMeanSquare(
&mAudioRecording.getData()[mLatencyReport.latencyInFrames], mPulse.size())
/ gain;
}
double latencyMillis = kMillisPerSecond * (double) mLatencyReport.latencyInFrames
/ getSampleRate();
report << LOOPBACK_RESULT_TAG "latency.frames = " << std::setw(8)
<< mLatencyReport.latencyInFrames << "\n";
report << LOOPBACK_RESULT_TAG "latency.msec = " << std::setw(8)
<< latencyMillis << "\n";
report << LOOPBACK_RESULT_TAG "latency.confidence = " << std::setw(8)
<< mLatencyReport.confidence << "\n";
}
mState = STATE_DONE;
if (getResult() == RESULT_OK) {
setResult(newResult);
}
return report.str();
}
int32_t getMeasuredLatency() override {
return mLatencyReport.latencyInFrames;
}
double getMeasuredConfidence() override {
return mLatencyReport.confidence;
}
double getBackgroundRMS() override {
return mBackgroundRMS;
}
double getSignalRMS() override {
return mSignalRMS;
}
void printStatus() override {
ALOGD("st = %d", mState);
}
result_code processInputFrame(float *frameData, int channelCount) override {
echo_state nextState = mState;
mLoopCounter++;
switch (mState) {
case STATE_MEASURE_BACKGROUND:
// Measure background RMS on channel 0
mBackgroundSumSquare += frameData[0] * frameData[0];
mBackgroundSumCount++;
mDownCounter--;
if (mDownCounter <= 0) {
mBackgroundRMS = sqrtf(mBackgroundSumSquare / mBackgroundSumCount);
nextState = STATE_IN_PULSE;
mPulseCursor = 0;
}
break;
case STATE_IN_PULSE:
// Record input until the mAudioRecording is full.
mAudioRecording.write(frameData, channelCount, 1);
if (hasEnoughData()) {
nextState = STATE_GOT_DATA;
}
break;
case STATE_GOT_DATA:
case STATE_DONE:
default:
break;
}
mState = nextState;
return RESULT_OK;
}
result_code processOutputFrame(float *frameData, int channelCount) override {
switch (mState) {
case STATE_IN_PULSE:
if (mPulseCursor < mPulse.size()) {
float pulseSample = mPulse.getData()[mPulseCursor++];
for (int i = 0; i < channelCount; i++) {
frameData[i] = pulseSample;
}
} else {
for (int i = 0; i < channelCount; i++) {
frameData[i] = 0;
}
}
break;
case STATE_MEASURE_BACKGROUND:
case STATE_GOT_DATA:
case STATE_DONE:
default:
for (int i = 0; i < channelCount; i++) {
frameData[i] = 0.0f; // silence
}
break;
}
return RESULT_OK;
}
private:
enum echo_state {
STATE_MEASURE_BACKGROUND,
STATE_IN_PULSE,
STATE_GOT_DATA, // must match RoundTripLatencyActivity.java
STATE_DONE,
};
const char *convertStateToText(echo_state state) {
switch (state) {
case STATE_MEASURE_BACKGROUND:
return "INIT";
case STATE_IN_PULSE:
return "PULSE";
case STATE_GOT_DATA:
return "GOT_DATA";
case STATE_DONE:
return "DONE";
}
return "UNKNOWN";
}
int32_t mDownCounter = 500;
int32_t mLoopCounter = 0;
echo_state mState = STATE_MEASURE_BACKGROUND;
static constexpr int32_t kFramesPerEncodedBit = 8; // multiple of 2
static constexpr int32_t kPulseLengthMillis = 500;
AudioRecording mPulse;
int32_t mPulseCursor = 0;
double mBackgroundSumSquare = 0.0;
int32_t mBackgroundSumCount = 0;
double mBackgroundRMS = 0.0;
double mSignalRMS = 0.0;
int32_t mFramesToRecord = 0;
AudioRecording mAudioRecording; // contains only the input after starting the pulse
LatencyReport mLatencyReport;
};
#endif // ANALYZER_LATENCY_ANALYZER_H