Add interface to propagate audio capture timestamp to the renderer.
BUG=3111
R=andrew@webrtc.org, turaj@webrtc.org, xians@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/12239004
git-svn-id: http://webrtc.googlecode.com/svn/trunk@6189 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule.cc b/talk/app/webrtc/test/fakeaudiocapturemodule.cc
index 3b36163..72d39c9 100644
--- a/talk/app/webrtc/test/fakeaudiocapturemodule.cc
+++ b/talk/app/webrtc/test/fakeaudiocapturemodule.cc
@@ -728,11 +728,22 @@
}
ResetRecBuffer();
uint32_t nSamplesOut = 0;
+#ifdef USE_WEBRTC_DEV_BRANCH
+ uint32_t rtp_timestamp = 0;
+ int64_t ntp_time_ms = 0;
+ if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
+ kNumberOfChannels, kSamplesPerSecond,
+ rec_buffer_, nSamplesOut,
+ &rtp_timestamp, &ntp_time_ms) != 0) {
+ ASSERT(false);
+ }
+#else
if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample,
kNumberOfChannels, kSamplesPerSecond,
rec_buffer_, nSamplesOut) != 0) {
ASSERT(false);
}
+#endif
ASSERT(nSamplesOut == kNumberSamples);
}
// The SetBuffer() function ensures that after decoding, the audio buffer
diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
index 5738955..ea92f7b 100644
--- a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
+++ b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc
@@ -84,13 +84,23 @@
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
+#ifdef USE_WEBRTC_DEV_BRANCH
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {
+#else
uint32_t& nSamplesOut) {
+#endif
++pull_iterations_;
const uint32_t audio_buffer_size = nSamples * nBytesPerSample;
const uint32_t bytes_out = RecordedDataReceived() ?
CopyFromRecBuffer(audioSamples, audio_buffer_size):
GenerateZeroBuffer(audioSamples, audio_buffer_size);
nSamplesOut = bytes_out / nBytesPerSample;
+#ifdef USE_WEBRTC_DEV_BRANCH
+ *rtp_timestamp = 0;
+ *ntp_time_ms = 0;
+#endif
return 0;
}
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
index 7a6a5d0..613491a 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc
@@ -473,6 +473,12 @@
SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
previous_audio_activity_ = audio_frame->vad_activity_;
call_stats_.DecodedByNetEq(audio_frame->speech_type_);
+
+ // Computes the RTP timestamp of the first sample in |audio_frame| from
+ // |PlayoutTimestamp|, which is the timestamp of the last sample of
+ // |audio_frame|.
+ audio_frame->timestamp_ =
+ PlayoutTimestamp() - audio_frame->samples_per_channel_;
return 0;
}
diff --git a/webrtc/modules/audio_device/audio_device_buffer.cc b/webrtc/modules/audio_device/audio_device_buffer.cc
index db5cc32..ed1bf20 100644
--- a/webrtc/modules/audio_device/audio_device_buffer.cc
+++ b/webrtc/modules/audio_device/audio_device_buffer.cc
@@ -548,13 +548,16 @@
if (_ptrCbAudioTransport)
{
uint32_t res(0);
-
+ uint32_t rtp_timestamp = 0;
+ int64_t ntp_time_ms = 0;
res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples,
playBytesPerSample,
playChannels,
playSampleRate,
&_playBuffer[0],
- nSamplesOut);
+ nSamplesOut,
+ &rtp_timestamp,
+ &ntp_time_ms);
if (res != 0)
{
WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id, "NeedMorePlayData() failed");
diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h
index 0704ea8..f65e3a8 100644
--- a/webrtc/modules/audio_device/include/audio_device_defines.h
+++ b/webrtc/modules/audio_device/include/audio_device_defines.h
@@ -63,14 +63,16 @@
const int32_t clockDrift,
const uint32_t currentMicLevel,
const bool keyPressed,
- uint32_t& newMicLevel) = 0;
+ uint32_t& newMicLevel) = 0;
virtual int32_t NeedMorePlayData(const uint32_t nSamples,
const uint8_t nBytesPerSample,
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
- uint32_t& nSamplesOut) = 0;
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) = 0;
// Method to pass captured data directly and unmixed to network channels.
// |channel_ids| contains a list of VoE channels which are the
@@ -125,7 +127,9 @@
// channel.
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
- void* audio_data) {}
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {}
protected:
virtual ~AudioTransport() {}
diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc
index 2749e83..b10accb 100644
--- a/webrtc/modules/audio_device/test/audio_device_test_api.cc
+++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc
@@ -116,7 +116,9 @@
const uint8_t nChannels,
const uint32_t sampleRate,
void* audioSamples,
- uint32_t& nSamplesOut) {
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {
play_count_++;
if (play_count_ % 100 == 0) {
if (nChannels == 1) {
@@ -149,7 +151,9 @@
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
- void* audio_data) {}
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {}
private:
uint32_t rec_count_;
uint32_t play_count_;
diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc
index 9f80282..a51ebfb 100644
--- a/webrtc/modules/audio_device/test/func_test_manager.cc
+++ b/webrtc/modules/audio_device/test/func_test_manager.cc
@@ -292,7 +292,9 @@
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
- uint32_t& nSamplesOut)
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms)
{
if (_fullDuplex)
{
@@ -551,7 +553,9 @@
void AudioTransportImpl::PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels,
int number_of_frames,
- void* audio_data) {}
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {}
FuncTestManager::FuncTestManager() :
_processThread(NULL),
diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h
index bd32f62..1a1c2a5 100644
--- a/webrtc/modules/audio_device/test/func_test_manager.h
+++ b/webrtc/modules/audio_device/test/func_test_manager.h
@@ -118,7 +118,9 @@
const uint8_t nChannels,
const uint32_t samplesPerSec,
void* audioSamples,
- uint32_t& nSamplesOut);
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms);
virtual int OnDataAvailable(const int voe_channels[],
int number_of_voe_channels,
@@ -138,7 +140,9 @@
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
- void* audio_data);
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms);
AudioTransportImpl(AudioDeviceModule* audioDevice);
~AudioTransportImpl();
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h
index d336ccf..f9ba592 100644
--- a/webrtc/modules/interface/module_common_types.h
+++ b/webrtc/modules/interface/module_common_types.h
@@ -684,7 +684,10 @@
AudioFrame& operator-=(const AudioFrame& rhs);
int id_;
+ // RTP timestamp of the first sample in the AudioFrame.
uint32_t timestamp_;
+ // NTP time of the estimated capture time in local timebase in milliseconds.
+ int64_t ntp_time_ms_;
int16_t data_[kMaxDataSizeSamples];
int samples_per_channel_;
int sample_rate_hz_;
@@ -705,6 +708,7 @@
inline AudioFrame::AudioFrame()
: id_(-1),
timestamp_(0),
+ ntp_time_ms_(0),
data_(),
samples_per_channel_(0),
sample_rate_hz_(0),
diff --git a/webrtc/test/fake_audio_device.cc b/webrtc/test/fake_audio_device.cc
index a6fe165..d3421eb 100644
--- a/webrtc/test/fake_audio_device.cc
+++ b/webrtc/test/fake_audio_device.cc
@@ -121,13 +121,17 @@
samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms,
kBufferSizeBytes / 2);
uint32_t samples_out = 0;
+ uint32_t rtp_timestamp = 0;
+ int64_t ntp_time_ms = 0;
EXPECT_EQ(0,
audio_callback_->NeedMorePlayData(samples_needed,
2,
1,
kFrequencyHz,
playout_buffer_,
- samples_out));
+ samples_out,
+ &rtp_timestamp,
+ &ntp_time_ms));
}
}
tick_->Wait(WEBRTC_EVENT_INFINITE);
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index f919c3d..365d4ca 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -664,6 +664,25 @@
// Measure audio level (0-9)
_outputAudioLevel.ComputeLevel(audioFrame);
+ // TODO(wu): Calculate capture NTP time based on RTP timestamp and RTCP SR.
+ audioFrame.ntp_time_ms_ = 0;
+
+ if (!first_frame_arrived_) {
+ first_frame_arrived_ = true;
+ capture_start_rtp_time_stamp_ = audioFrame.timestamp_;
+ } else {
+ // |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received.
+ if (audioFrame.ntp_time_ms_ > 0) {
+ // Compute |capture_start_ntp_time_ms_| so that
+ // |capture_start_ntp_time_ms_| + |elapsed_time_ms| == |ntp_time_ms_|
+ CriticalSectionScoped lock(ts_stats_lock_.get());
+ uint32_t elapsed_time_ms =
+ (audioFrame.timestamp_ - capture_start_rtp_time_stamp_) /
+ (audioFrame.sample_rate_hz_ * 1000);
+ capture_start_ntp_time_ms_ = audioFrame.ntp_time_ms_ - elapsed_time_ms;
+ }
+ }
+
return 0;
}
@@ -836,6 +855,10 @@
playout_delay_ms_(0),
_numberOfDiscardedPackets(0),
send_sequence_number_(0),
+ ts_stats_lock_(CriticalSectionWrapper::CreateCriticalSection()),
+ first_frame_arrived_(false),
+ capture_start_rtp_time_stamp_(0),
+ capture_start_ntp_time_ms_(-1),
_engineStatisticsPtr(NULL),
_outputMixerPtr(NULL),
_transmitMixerPtr(NULL),
@@ -3371,7 +3394,7 @@
int
Channel::GetRTPStatistics(CallStatistics& stats)
{
- // --- Part one of the final structure (four values)
+ // --- RtcpStatistics
// The jitter statistics is updated for each received RTP packet and is
// based on received packets.
@@ -3398,7 +3421,7 @@
stats.fractionLost, stats.cumulativeLost, stats.extendedMax,
stats.jitterSamples);
- // --- Part two of the final structure (one value)
+ // --- RTT
uint16_t RTT(0);
RTCPMethod method = _rtpRtcpModule->RTCP();
@@ -3441,7 +3464,7 @@
VoEId(_instanceId, _channelId),
"GetRTPStatistics() => rttMs=%d", stats.rttMs);
- // --- Part three of the final structure (four values)
+ // --- Data counters
uint32_t bytesSent(0);
uint32_t packetsSent(0);
@@ -3473,6 +3496,11 @@
stats.bytesSent, stats.packetsSent, stats.bytesReceived,
stats.packetsReceived);
+ // --- Timestamps
+ {
+ CriticalSectionScoped lock(ts_stats_lock_.get());
+ stats.capture_start_ntp_time_ms_ = capture_start_ntp_time_ms_;
+ }
return 0;
}
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index ed03519..7b40ed2 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -540,6 +540,15 @@
uint16_t send_sequence_number_;
uint8_t restored_packet_[kVoiceEngineMaxIpPacketSizeBytes];
+ scoped_ptr<CriticalSectionWrapper> ts_stats_lock_;
+
+ bool first_frame_arrived_;
+ // The rtp timestamp of the first played out audio frame.
+ uint32_t capture_start_rtp_time_stamp_;
+ // The capture ntp time (in local timebase) of the first played out audio
+ // frame.
+ int64_t capture_start_ntp_time_ms_;
+
// uses
Statistics* _engineStatisticsPtr;
OutputMixer* _outputMixerPtr;
diff --git a/webrtc/voice_engine/include/voe_rtp_rtcp.h b/webrtc/voice_engine/include/voe_rtp_rtcp.h
index f3a6313..2fb09cc 100644
--- a/webrtc/voice_engine/include/voe_rtp_rtcp.h
+++ b/webrtc/voice_engine/include/voe_rtp_rtcp.h
@@ -86,6 +86,9 @@
int packetsSent;
int bytesReceived;
int packetsReceived;
+ // The capture ntp time (in local timebase) of the first played out audio
+ // frame.
+ int64_t capture_start_ntp_time_ms_;
};
// See section 6.4.1 in http://www.ietf.org/rfc/rfc3550.txt for details.
diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc
index 1b4b867..cfedd40 100644
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -148,7 +148,9 @@
uint8_t nChannels,
uint32_t samplesPerSec,
void* audioSamples,
- uint32_t& nSamplesOut)
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms)
{
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1),
"VoEBaseImpl::NeedMorePlayData(nSamples=%u, "
@@ -157,7 +159,8 @@
GetPlayoutData(static_cast<int>(samplesPerSec),
static_cast<int>(nChannels),
- static_cast<int>(nSamples), true, audioSamples);
+ static_cast<int>(nSamples), true, audioSamples,
+ rtp_timestamp, ntp_time_ms);
nSamplesOut = _audioFrame.samples_per_channel_;
@@ -233,12 +236,14 @@
void VoEBaseImpl::PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
- void* audio_data) {
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {
assert(bits_per_sample == 16);
assert(number_of_frames == static_cast<int>(sample_rate / 100));
GetPlayoutData(sample_rate, number_of_channels, number_of_frames, false,
- audio_data);
+ audio_data, rtp_timestamp, ntp_time_ms);
}
int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer)
@@ -1081,7 +1086,9 @@
void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels,
int number_of_frames, bool feed_data_to_apm,
- void* audio_data) {
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms) {
assert(_shared->output_mixer() != NULL);
// TODO(andrew): if the device is running in mono, we should tell the mixer
@@ -1102,6 +1109,9 @@
// Deliver audio (PCM) samples to the ADM
memcpy(audio_data, _audioFrame.data_,
sizeof(int16_t) * number_of_frames * number_of_channels);
+
+ *rtp_timestamp = _audioFrame.timestamp_;
+ *ntp_time_ms = _audioFrame.ntp_time_ms_;
}
} // namespace webrtc
diff --git a/webrtc/voice_engine/voe_base_impl.h b/webrtc/voice_engine/voe_base_impl.h
index 96dc225..fbcb4dd 100644
--- a/webrtc/voice_engine/voe_base_impl.h
+++ b/webrtc/voice_engine/voe_base_impl.h
@@ -79,7 +79,9 @@
uint8_t nChannels,
uint32_t samplesPerSec,
void* audioSamples,
- uint32_t& nSamplesOut);
+ uint32_t& nSamplesOut,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms);
virtual int OnDataAvailable(const int voe_channels[],
int number_of_voe_channels,
@@ -102,7 +104,9 @@
virtual void PullRenderData(int bits_per_sample, int sample_rate,
int number_of_channels, int number_of_frames,
- void* audio_data);
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms);
// AudioDeviceObserver
virtual void OnErrorIsReported(ErrorCode error);
@@ -138,7 +142,9 @@
void GetPlayoutData(int sample_rate, int number_of_channels,
int number_of_frames, bool feed_data_to_apm,
- void* audio_data);
+ void* audio_data,
+ uint32_t* rtp_timestamp,
+ int64_t* ntp_time_ms);
int32_t AddBuildInfo(char* str) const;
int32_t AddVoEVersion(char* str) const;