src/jni/rtp/AudioGroup.cpp - platform/frameworks/opt/net/voip - Git at Google

 /*
  * Copyright (C) 2010 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <stdio.h>
 #include <stdint.h>
 #include <string.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <sys/epoll.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <time.h>
 #include <arpa/inet.h>
 #include <netinet/in.h>

 // #define LOG_NDEBUG 0
 #define LOG_TAG "AudioGroup"
 #include <cutils/atomic.h>
 #include <cutils/properties.h>
 #include <utils/Log.h>
 #include <utils/Errors.h>
 #include <utils/RefBase.h>
 #include <utils/threads.h>
 #include <utils/SystemClock.h>
 #include <media/AudioRecord.h>
 #include <media/AudioTrack.h>
 #include <media/AudioEffect.h>
 #include <system/audio_effects/effect_aec.h>
 #include <system/audio.h>

 #include <nativehelper/ScopedUtfChars.h>

 #include "jni.h"
 #include <nativehelper/JNIHelp.h>

 #include "AudioCodec.h"
 #include "EchoSuppressor.h"

 extern int parse(JNIEnv *env, jstring jAddress, int port, sockaddr_storage *ss);

 namespace {

 using namespace android;

 int gRandom = -1;

 // We use a circular array to implement jitter buffer. The simplest way is doing
 // a modulo operation on the index while accessing the array. However modulo can
 // be expensive on some platforms, such as ARM. Thus we round up the size of the
 // array to the nearest power of 2 and then use bitwise-and instead of modulo.
 // Currently we make it 2048ms long and assume packet interval is 50ms or less.
 // The first 100ms is the place where samples get mixed. The rest is the real
 // jitter buffer. For a stream at 8000Hz it takes 32 kilobytes. These numbers
 // are chosen by experiments and each of them can be adjusted as needed.

 // Originally a stream does not send packets when it is receive-only or there is
 // nothing to mix. However, this causes some problems with certain firewalls and
 // proxies. A firewall might remove a port mapping when there is no outgoing
 // packet for a preiod of time, and a proxy might wait for incoming packets from
 // both sides before start forwarding. To solve these problems, we send out a
 // silence packet on the stream for every second. It should be good enough to
 // keep the stream alive with relatively low resources.

 // Other notes:
 // + We use elapsedRealtime() to get the time. Since we use 32bit variables
 //   instead of 64bit ones, comparison must be done by subtraction.
 // + Sampling rate must be multiple of 1000Hz, and packet length must be in
 //   milliseconds. No floating points.
 // + If we cannot get enough CPU, we drop samples and simulate packet loss.
 // + Resampling is not done yet, so streams in one group must use the same rate.
 //   For the first release only 8000Hz is supported.

 #define BUFFER_SIZE     2048
 #define HISTORY_SIZE    100
 #define MEASURE_BASE    100
 #define MEASURE_PERIOD  5000
 #define DTMF_PERIOD     200

 class AudioStream
 {
 public:
     AudioStream();
     ~AudioStream();
     bool set(int mode, int socket, sockaddr_storage *remote,
         AudioCodec *codec, int sampleRate, int sampleCount,
         int codecType, int dtmfType);

     void sendDtmf(int event);
     bool mix(int32_t *output, int head, int tail, int sampleRate);
     void encode(int tick, AudioStream *chain);
     void decode(int tick);

 private:
     enum {
         NORMAL = 0,
         SEND_ONLY = 1,
         RECEIVE_ONLY = 2,
         LAST_MODE = 2,
     };

     int mMode;
     int mSocket;
     sockaddr_storage mRemote;
     AudioCodec *mCodec;
     uint32_t mCodecMagic;
     uint32_t mDtmfMagic;
     bool mFixRemote;

     int mTick;
     int mSampleRate;
     int mSampleCount;
     int mInterval;
     int mKeepAlive;

     int16_t *mBuffer;
     int mBufferMask;
     int mBufferHead;
     int mBufferTail;
     int mLatencyTimer;
     int mLatencyScore;

     uint16_t mSequence;
     uint32_t mTimestamp;
     uint32_t mSsrc;

     int mDtmfEvent;
     int mDtmfStart;

     AudioStream *mNext;

     friend class AudioGroup;
 };

 AudioStream::AudioStream()
 {
     mSocket = -1;
     mCodec = NULL;
     mBuffer = NULL;
     mNext = NULL;
 }

 AudioStream::~AudioStream()
 {
     close(mSocket);
     delete mCodec;
     delete [] mBuffer;
     ALOGD("stream[%d] is dead", mSocket);
 }

 bool AudioStream::set(int mode, int socket, sockaddr_storage *remote,
     AudioCodec *codec, int sampleRate, int sampleCount,
     int codecType, int dtmfType)
 {
     if (mode < 0 || mode > LAST_MODE) {
         return false;
     }
     mMode = mode;

     mCodecMagic = (0x8000 | codecType) << 16;
     mDtmfMagic = (dtmfType == -1) ? 0 : (0x8000 | dtmfType) << 16;

     mTick = elapsedRealtime();
     mSampleRate = sampleRate / 1000;
     mSampleCount = sampleCount;
     mInterval = mSampleCount / mSampleRate;

     // Allocate jitter buffer.
     for (mBufferMask = 8; mBufferMask < mSampleRate; mBufferMask <<= 1);
     mBufferMask *= BUFFER_SIZE;
     mBuffer = new int16_t[mBufferMask];
     --mBufferMask;
     mBufferHead = 0;
     mBufferTail = 0;
     mLatencyTimer = 0;
     mLatencyScore = 0;

     // Initialize random bits.
     read(gRandom, &mSequence, sizeof(mSequence));
     read(gRandom, &mTimestamp, sizeof(mTimestamp));
     read(gRandom, &mSsrc, sizeof(mSsrc));

     mDtmfEvent = -1;
     mDtmfStart = 0;

     // Only take over these things when succeeded.
     mSocket = socket;
     if (codec) {
         mRemote = *remote;
         mCodec = codec;

         // Here we should never get an private address, but some buggy proxy
         // servers do give us one. To solve this, we replace the address when
         // the first time we successfully decode an incoming packet.
         mFixRemote = false;
         if (remote->ss_family == AF_INET) {
             unsigned char *address =
                 (unsigned char *)&((sockaddr_in *)remote)->sin_addr;
             if (address[0] == 10 ||
                 (address[0] == 172 && (address[1] >> 4) == 1) ||
                 (address[0] == 192 && address[1] == 168)) {
                 mFixRemote = true;
             }
         }
     }

     ALOGD("stream[%d] is configured as %s %dkHz %dms mode %d", mSocket,
         (codec ? codec->name : "RAW"), mSampleRate, mInterval, mMode);
     return true;
 }

 void AudioStream::sendDtmf(int event)
 {
     if (mDtmfMagic != 0) {
         mDtmfEvent = event << 24;
         mDtmfStart = mTimestamp + mSampleCount;
     }
 }

 bool AudioStream::mix(int32_t *output, int head, int tail, int sampleRate)
 {
     if (mMode == SEND_ONLY) {
         return false;
     }

     if (head - mBufferHead < 0) {
         head = mBufferHead;
     }
     if (tail - mBufferTail > 0) {
         tail = mBufferTail;
     }
     if (tail - head <= 0) {
         return false;
     }

     head *= mSampleRate;
     tail *= mSampleRate;

     if (sampleRate == mSampleRate) {
         for (int i = head; i - tail < 0; ++i) {
             output[i - head] += mBuffer[i & mBufferMask];
         }
     } else {
         // TODO: implement resampling.
         return false;
     }
     return true;
 }

 void AudioStream::encode(int tick, AudioStream *chain)
 {
     if (tick - mTick >= mInterval) {
         // We just missed the train. Pretend that packets in between are lost.
         int skipped = (tick - mTick) / mInterval;
         mTick += skipped * mInterval;
         mSequence += skipped;
         mTimestamp += skipped * mSampleCount;
         ALOGV("stream[%d] skips %d packets", mSocket, skipped);
     }

     tick = mTick;
     mTick += mInterval;
     ++mSequence;
     mTimestamp += mSampleCount;

     // If there is an ongoing DTMF event, send it now.
     if (mMode != RECEIVE_ONLY && mDtmfEvent != -1) {
         int duration = mTimestamp - mDtmfStart;
         // Make sure duration is reasonable.
         if (duration >= 0 && duration < mSampleRate * DTMF_PERIOD) {
             duration += mSampleCount;
             int32_t buffer[4] = {
                 static_cast<int32_t>(htonl(mDtmfMagic | mSequence)),
                 static_cast<int32_t>(htonl(mDtmfStart)),
                 static_cast<int32_t>(mSsrc),
                 static_cast<int32_t>(htonl(mDtmfEvent | duration)),
             };
             if (duration >= mSampleRate * DTMF_PERIOD) {
                 buffer[3] |= htonl(1 << 23);
                 mDtmfEvent = -1;
             }
             sendto(mSocket, buffer, sizeof(buffer), MSG_DONTWAIT,
                 (sockaddr *)&mRemote, sizeof(mRemote));
             return;
         }
         mDtmfEvent = -1;
     }

     int32_t buffer[mSampleCount + 3];
     bool data = false;
     if (mMode != RECEIVE_ONLY) {
         // Mix all other streams.
         memset(buffer, 0, sizeof(buffer));
         while (chain) {
             if (chain != this) {
                 data |= chain->mix(buffer, tick - mInterval, tick, mSampleRate);
             }
             chain = chain->mNext;
         }
     }

     int16_t samples[mSampleCount];
     if (data) {
         // Saturate into 16 bits.
         for (int i = 0; i < mSampleCount; ++i) {
             int32_t sample = buffer[i];
             if (sample < -32768) {
                 sample = -32768;
             }
             if (sample > 32767) {
                 sample = 32767;
             }
             samples[i] = sample;
         }
     } else {
         if ((mTick ^ mKeepAlive) >> 10 == 0) {
             return;
         }
         mKeepAlive = mTick;
         memset(samples, 0, sizeof(samples));

         if (mMode != RECEIVE_ONLY) {
             ALOGV("stream[%d] no data", mSocket);
         }
     }

     if (!mCodec) {
         // Special case for device stream.
         send(mSocket, samples, sizeof(samples), MSG_DONTWAIT);
         return;
     }

     // Cook the packet and send it out.
     buffer[0] = htonl(mCodecMagic | mSequence);
     buffer[1] = htonl(mTimestamp);
     buffer[2] = mSsrc;
     int length = mCodec->encode(&buffer[3], samples);
     if (length <= 0) {
         ALOGV("stream[%d] encoder error", mSocket);
         return;
     }
     sendto(mSocket, buffer, length + 12, MSG_DONTWAIT, (sockaddr *)&mRemote,
         sizeof(mRemote));
 }

 void AudioStream::decode(int tick)
 {
     char c;
     if (mMode == SEND_ONLY) {
         recv(mSocket, &c, 1, MSG_DONTWAIT);
         return;
     }

     // Make sure mBufferHead and mBufferTail are reasonable.
     if ((unsigned int)(tick + BUFFER_SIZE - mBufferHead) > BUFFER_SIZE * 2) {
         mBufferHead = tick - HISTORY_SIZE;
         mBufferTail = mBufferHead;
     }

     if (tick - mBufferHead > HISTORY_SIZE) {
         // Throw away outdated samples.
         mBufferHead = tick - HISTORY_SIZE;
         if (mBufferTail - mBufferHead < 0) {
             mBufferTail = mBufferHead;
         }
     }

     // Adjust the jitter buffer if the latency keeps larger than the threshold
     // in the measurement period.
     int score = mBufferTail - tick - MEASURE_BASE;
     if (mLatencyScore > score || mLatencyScore <= 0) {
         mLatencyScore = score;
         mLatencyTimer = tick;
     } else if (tick - mLatencyTimer >= MEASURE_PERIOD) {
         ALOGV("stream[%d] reduces latency of %dms", mSocket, mLatencyScore);
         mBufferTail -= mLatencyScore;
         mLatencyScore = -1;
     }

     int count = (BUFFER_SIZE - (mBufferTail - mBufferHead)) * mSampleRate;
     if (count < mSampleCount) {
         // Buffer overflow. Drop the packet.
         ALOGV("stream[%d] buffer overflow", mSocket);
         recv(mSocket, &c, 1, MSG_DONTWAIT);
         return;
     }

     // Receive the packet and decode it.
     int16_t samples[count];
     if (!mCodec) {
         // Special case for device stream.
         count = recv(mSocket, samples, sizeof(samples),
             MSG_TRUNC | MSG_DONTWAIT) >> 1;
     } else {
         __attribute__((aligned(4))) uint8_t buffer[2048];
         sockaddr_storage remote;
         socklen_t addrlen = sizeof(remote);

         int bufferSize = sizeof(buffer);
         int length = recvfrom(mSocket, buffer, bufferSize,
             MSG_TRUNC | MSG_DONTWAIT, (sockaddr *)&remote, &addrlen);

         // Do we need to check SSRC, sequence, and timestamp? They are not
         // reliable but at least they can be used to identify duplicates?
         if (length < 12 || length > bufferSize ||
             (ntohl(*(uint32_t *)buffer) & 0xC07F0000) != mCodecMagic) {
             ALOGV("stream[%d] malformed packet", mSocket);
             return;
         }
         int offset = 12 + ((buffer[0] & 0x0F) << 2);
         if (offset+2 >= bufferSize) {
             ALOGV("invalid buffer offset: %d", offset+2);
             return;
         }
         if ((buffer[0] & 0x10) != 0) {
             offset += 4 + (ntohs(*(uint16_t *)&buffer[offset + 2]) << 2);
         }
         if (offset >= bufferSize) {
             ALOGV("invalid buffer offset: %d", offset);
             return;
         }
         if ((buffer[0] & 0x20) != 0) {
             length -= buffer[length - 1];
         }
         length -= offset;
         if (length >= 0) {
             length = mCodec->decode(samples, count, &buffer[offset], length);
         }
         if (length > 0 && mFixRemote) {
             mRemote = remote;
             mFixRemote = false;
         }
         count = length;
     }
     if (count <= 0) {
         ALOGV("stream[%d] decoder error", mSocket);
         return;
     }

     if (tick - mBufferTail > 0) {
         // Buffer underrun. Reset the jitter buffer.
         ALOGV("stream[%d] buffer underrun", mSocket);
         if (mBufferTail - mBufferHead <= 0) {
             mBufferHead = tick + mInterval;
             mBufferTail = mBufferHead;
         } else {
             int tail = (tick + mInterval) * mSampleRate;
             for (int i = mBufferTail * mSampleRate; i - tail < 0; ++i) {
                 mBuffer[i & mBufferMask] = 0;
             }
             mBufferTail = tick + mInterval;
         }
     }

     // Append to the jitter buffer.
     int tail = mBufferTail * mSampleRate;
     for (int i = 0; i < count; ++i) {
         mBuffer[tail & mBufferMask] = samples[i];
         ++tail;
     }
     mBufferTail += mInterval;
 }

 //------------------------------------------------------------------------------

 class AudioGroup
 {
 public:
     explicit AudioGroup(const String16 &opPackageName);
     ~AudioGroup();
     bool set(int sampleRate, int sampleCount);

     bool setMode(int mode);
     bool sendDtmf(int event);
     bool add(AudioStream *stream);
     bool remove(AudioStream *stream);
     bool platformHasAec() { return mPlatformHasAec; }

 private:
     enum {
         ON_HOLD = 0,
         MUTED = 1,
         NORMAL = 2,
         ECHO_SUPPRESSION = 3,
         LAST_MODE = 3,
     };

     bool checkPlatformAec();

     AudioStream *mChain;
     int mEventQueue;
     volatile int mDtmfEvent;

     String16 mOpPackageName;

     int mMode;
     int mSampleRate;
     size_t mSampleCount;
     int mDeviceSocket;
     bool mPlatformHasAec;

     class NetworkThread : public Thread
     {
     public:
         explicit NetworkThread(AudioGroup *group) : Thread(false), mGroup(group) {}

         bool start()
         {
             if (run("Network", ANDROID_PRIORITY_AUDIO) != NO_ERROR) {
                 ALOGE("cannot start network thread");
                 return false;
             }
             return true;
         }

     private:
         AudioGroup *mGroup;
         bool threadLoop();
     };
     sp<NetworkThread> mNetworkThread;

     class DeviceThread : public Thread
     {
     public:
         explicit DeviceThread(AudioGroup *group) : Thread(false), mGroup(group) {}

         bool start()
         {
             if (run("Device", ANDROID_PRIORITY_AUDIO) != NO_ERROR) {
                 ALOGE("cannot start device thread");
                 return false;
             }
             return true;
         }

     private:
         AudioGroup *mGroup;
         bool threadLoop();
     };
     sp<DeviceThread> mDeviceThread;
 };

 AudioGroup::AudioGroup(const String16 &opPackageName)
 {
     mOpPackageName = opPackageName;
     mMode = ON_HOLD;
     mChain = NULL;
     mEventQueue = -1;
     mDtmfEvent = -1;
     mDeviceSocket = -1;
     mNetworkThread = new NetworkThread(this);
     mDeviceThread = new DeviceThread(this);
     mPlatformHasAec = checkPlatformAec();
 }

 AudioGroup::~AudioGroup()
 {
     mNetworkThread->requestExitAndWait();
     mDeviceThread->requestExitAndWait();
     close(mEventQueue);
     close(mDeviceSocket);
     while (mChain) {
         AudioStream *next = mChain->mNext;
         delete mChain;
         mChain = next;
     }
     ALOGD("group[%d] is dead", mDeviceSocket);
 }

 bool AudioGroup::set(int sampleRate, int sampleCount)
 {
     mEventQueue = epoll_create1(EPOLL_CLOEXEC);
     if (mEventQueue == -1) {
         ALOGE("epoll_create1: %s", strerror(errno));
         return false;
     }

     mSampleRate = sampleRate;
     mSampleCount = sampleCount;

     // Create device socket.
     int pair[2];
     if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair)) {
         ALOGE("socketpair: %s", strerror(errno));
         return false;
     }
     mDeviceSocket = pair[0];

     // Create device stream.
     mChain = new AudioStream;
     if (!mChain->set(AudioStream::NORMAL, pair[1], NULL, NULL,
         sampleRate, sampleCount, -1, -1)) {
         close(pair[1]);
         ALOGE("cannot initialize device stream");
         return false;
     }

     // Give device socket a reasonable timeout.
     timeval tv;
     tv.tv_sec = 0;
     tv.tv_usec = 1000 * sampleCount / sampleRate * 500;
     if (setsockopt(pair[0], SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
         ALOGE("setsockopt: %s", strerror(errno));
         return false;
     }

     // Add device stream into event queue.
     epoll_event event;
     event.events = EPOLLIN;
     event.data.ptr = mChain;
     if (epoll_ctl(mEventQueue, EPOLL_CTL_ADD, pair[1], &event)) {
         ALOGE("epoll_ctl: %s", strerror(errno));
         return false;
     }

     // Anything else?
     ALOGD("stream[%d] joins group[%d]", pair[1], pair[0]);
     return true;
 }

 bool AudioGroup::setMode(int mode)
 {
     if (mode < 0 || mode > LAST_MODE) {
         return false;
     }
     // FIXME: temporary code to overcome echo and mic gain issues on herring and tuna boards.
     // Must be modified/removed when the root cause of the issue is fixed in the hardware or
     // driver
     char value[PROPERTY_VALUE_MAX];
     property_get("ro.product.board", value, "");
     if (mode == NORMAL &&
             (!strcmp(value, "herring") || !strcmp(value, "tuna"))) {
         mode = ECHO_SUPPRESSION;
     }
     if (mMode == mode) {
         return true;
     }

     mDeviceThread->requestExitAndWait();
     ALOGD("group[%d] switches from mode %d to %d", mDeviceSocket, mMode, mode);
     mMode = mode;
     return (mode == ON_HOLD) || mDeviceThread->start();
 }

 bool AudioGroup::sendDtmf(int event)
 {
     if (event < 0 || event > 15) {
         return false;
     }

     // DTMF is rarely used, so we try to make it as lightweight as possible.
     // Using volatile might be dodgy, but using a pipe or pthread primitives
     // or stop-set-restart threads seems too heavy. Will investigate later.
     timespec ts;
     ts.tv_sec = 0;
     ts.tv_nsec = 100000000;
     for (int i = 0; mDtmfEvent != -1 && i < 20; ++i) {
         nanosleep(&ts, NULL);
     }
     if (mDtmfEvent != -1) {
         return false;
     }
     mDtmfEvent = event;
     nanosleep(&ts, NULL);
     return true;
 }

 bool AudioGroup::add(AudioStream *stream)
 {
     mNetworkThread->requestExitAndWait();

     epoll_event event;
     event.events = EPOLLIN;
     event.data.ptr = stream;
     if (epoll_ctl(mEventQueue, EPOLL_CTL_ADD, stream->mSocket, &event)) {
         ALOGE("epoll_ctl: %s", strerror(errno));
         return false;
     }

     stream->mNext = mChain->mNext;
     mChain->mNext = stream;
     if (!mNetworkThread->start()) {
         // Only take over the stream when succeeded.
         mChain->mNext = stream->mNext;
         return false;
     }

     ALOGD("stream[%d] joins group[%d]", stream->mSocket, mDeviceSocket);
     return true;
 }

 bool AudioGroup::remove(AudioStream *stream)
 {
     mNetworkThread->requestExitAndWait();

     for (AudioStream *chain = mChain; chain->mNext; chain = chain->mNext) {
         if (chain->mNext == stream) {
             if (epoll_ctl(mEventQueue, EPOLL_CTL_DEL, stream->mSocket, NULL)) {
                 ALOGE("epoll_ctl: %s", strerror(errno));
                 return false;
             }
             chain->mNext = stream->mNext;
             ALOGD("stream[%d] leaves group[%d]", stream->mSocket, mDeviceSocket);
             delete stream;
             break;
         }
     }

     // Do not start network thread if there is only one stream.
     if (!mChain->mNext || !mNetworkThread->start()) {
         return false;
     }
     return true;
 }

 bool AudioGroup::NetworkThread::threadLoop()
 {
     AudioStream *chain = mGroup->mChain;
     int tick = elapsedRealtime();
     int deadline = tick + 10;
     int count = 0;

     for (AudioStream *stream = chain; stream; stream = stream->mNext) {
         if (tick - stream->mTick >= 0) {
             stream->encode(tick, chain);
         }
         if (deadline - stream->mTick > 0) {
             deadline = stream->mTick;
         }
         ++count;
     }

     int event = mGroup->mDtmfEvent;
     if (event != -1) {
         for (AudioStream *stream = chain; stream; stream = stream->mNext) {
             stream->sendDtmf(event);
         }
         mGroup->mDtmfEvent = -1;
     }

     deadline -= tick;
     if (deadline < 1) {
         deadline = 1;
     }

     epoll_event events[count];
     count = epoll_wait(mGroup->mEventQueue, events, count, deadline);
     if (count == -1) {
         ALOGE("epoll_wait: %s", strerror(errno));
         return false;
     }
     for (int i = 0; i < count; ++i) {
         ((AudioStream *)events[i].data.ptr)->decode(tick);
     }

     return true;
 }

 bool AudioGroup::checkPlatformAec()
 {
     effect_descriptor_t fxDesc;
     uint32_t numFx;

     if (AudioEffect::queryNumberEffects(&numFx) != NO_ERROR) {
         return false;
     }
     for (uint32_t i = 0; i < numFx; i++) {
         if (AudioEffect::queryEffect(i, &fxDesc) != NO_ERROR) {
             continue;
         }
         if (memcmp(&fxDesc.type, FX_IID_AEC, sizeof(effect_uuid_t)) == 0) {
             return true;
         }
     }
     return false;
 }

 bool AudioGroup::DeviceThread::threadLoop()
 {
     int mode = mGroup->mMode;
     int sampleRate = mGroup->mSampleRate;
     size_t sampleCount = mGroup->mSampleCount;
     int deviceSocket = mGroup->mDeviceSocket;

     // Find out the frame count for AudioTrack and AudioRecord.
     size_t output = 0;
     size_t input = 0;
     if (AudioTrack::getMinFrameCount(&output, AUDIO_STREAM_VOICE_CALL,
         sampleRate) != NO_ERROR || output <= 0 ||
         AudioRecord::getMinFrameCount(&input, sampleRate,
         AUDIO_FORMAT_PCM_16_BIT, AUDIO_CHANNEL_IN_MONO) != NO_ERROR || input <= 0) {
         ALOGE("cannot compute frame count");
         return false;
     }
     ALOGD("reported frame count: output %zu, input %zu", output, input);

     if (output < sampleCount * 2) {
         output = sampleCount * 2;
     }
     if (input < sampleCount * 2) {
         input = sampleCount * 2;
     }
     ALOGD("adjusted frame count: output %zu, input %zu", output, input);

     // Initialize AudioTrack and AudioRecord.
     sp<AudioTrack> track = new AudioTrack();
     sp<AudioRecord> record = new AudioRecord(mGroup->mOpPackageName);
     // Set caller name so it can be logged in destructor.
     // MediaMetricsConstants.h: AMEDIAMETRICS_PROP_CALLERNAME_VALUE_RTP
     track->setCallerName("rtp");
     record->setCallerName("rtp");
     if (track->set(AUDIO_STREAM_VOICE_CALL, sampleRate, AUDIO_FORMAT_PCM_16_BIT,
                 AUDIO_CHANNEL_OUT_MONO, output, AUDIO_OUTPUT_FLAG_NONE, NULL /*callback_t*/,
                 NULL /*user*/, 0 /*notificationFrames*/, 0 /*sharedBuffer*/,
                 false /*threadCanCallJava*/, AUDIO_SESSION_ALLOCATE,
                 AudioTrack::TRANSFER_OBTAIN) != NO_ERROR ||
             record->set(AUDIO_SOURCE_VOICE_COMMUNICATION, sampleRate, AUDIO_FORMAT_PCM_16_BIT,
                 AUDIO_CHANNEL_IN_MONO, input, NULL /*callback_t*/, NULL /*user*/,
                 0 /*notificationFrames*/, false /*threadCanCallJava*/, AUDIO_SESSION_ALLOCATE,
                 AudioRecord::TRANSFER_OBTAIN) != NO_ERROR) {
         ALOGE("cannot initialize audio device");
         return false;
     }
     ALOGD("latency: output %d, input %d", track->latency(), record->latency());

     // Give device socket a reasonable buffer size.
     setsockopt(deviceSocket, SOL_SOCKET, SO_RCVBUF, &output, sizeof(output));
     setsockopt(deviceSocket, SOL_SOCKET, SO_SNDBUF, &output, sizeof(output));

     // Drain device socket.
     char c;
     while (recv(deviceSocket, &c, 1, MSG_DONTWAIT) == 1);

     // check if platform supports echo cancellation and do not active local echo suppression in
     // this case
     EchoSuppressor *echo = NULL;
     sp<AudioEffect> aec;
     if (mode == ECHO_SUPPRESSION) {
         if (mGroup->platformHasAec()) {
             aec = new AudioEffect(mGroup->mOpPackageName);
             aec->set(FX_IID_AEC,
                      NULL,
                      0,
                      0,
                      0,
                      record->getSessionId(),
                      AUDIO_IO_HANDLE_NONE); // record sessionId is sufficient.
             status_t status = aec->initCheck();
             if (status == NO_ERROR || status == ALREADY_EXISTS) {
                 aec->setEnabled(true);
             } else {
                 aec.clear();
             }
         }
         // Create local echo suppressor if platform AEC cannot be used.
         if (aec == 0) {
              echo = new EchoSuppressor(sampleCount,
                                        (track->latency() + record->latency()) * sampleRate / 1000);
         }
     }
     // Start AudioRecord before AudioTrack. This prevents AudioTrack from being
     // disabled due to buffer underrun while waiting for AudioRecord.
     if (mode != MUTED) {
         record->start();
         int16_t one;
         // FIXME this may not work any more
         record->read(&one, sizeof(one));
     }
     track->start();

     while (!exitPending()) {
         int16_t output[sampleCount];
         if (recv(deviceSocket, output, sizeof(output), 0) <= 0) {
             memset(output, 0, sizeof(output));
         }

         int16_t input[sampleCount];
         int toWrite = sampleCount;
         int toRead = (mode == MUTED) ? 0 : sampleCount;
         int chances = 100;

         while (--chances > 0 && (toWrite > 0 || toRead > 0)) {
             if (toWrite > 0) {
                 AudioTrack::Buffer buffer;
                 buffer.frameCount = toWrite;

                 status_t status = track->obtainBuffer(&buffer, 1);
                 if (status == NO_ERROR) {
                     int offset = sampleCount - toWrite;
                     memcpy(buffer.i8, &output[offset], buffer.size);
                     toWrite -= buffer.frameCount;
                     track->releaseBuffer(&buffer);
                 } else if (status != TIMED_OUT && status != WOULD_BLOCK) {
                     ALOGE("cannot write to AudioTrack");
                     goto exit;
                 }
             }

             if (toRead > 0) {
                 AudioRecord::Buffer buffer;
                 buffer.frameCount = toRead;

                 status_t status = record->obtainBuffer(&buffer, 1);
                 if (status == NO_ERROR) {
                     int offset = sampleCount - toRead;
                     memcpy(&input[offset], buffer.i8, buffer.size);
                     toRead -= buffer.frameCount;
                     record->releaseBuffer(&buffer);
                 } else if (status != TIMED_OUT && status != WOULD_BLOCK) {
                     ALOGE("cannot read from AudioRecord");
                     goto exit;
                 }
             }
         }

         if (chances <= 0) {
             ALOGW("device loop timeout");
             while (recv(deviceSocket, &c, 1, MSG_DONTWAIT) == 1);
         }

         if (mode != MUTED) {
             if (echo != NULL) {
                 ALOGV("echo->run()");
                 echo->run(output, input);
             }
             send(deviceSocket, input, sizeof(input), MSG_DONTWAIT);
         }
     }

 exit:
     delete echo;
     return true;
 }

 //------------------------------------------------------------------------------

 static jfieldID gNative;
 static jfieldID gMode;

 jlong add(JNIEnv *env, jobject thiz, jint mode,
     jint socket, jstring jRemoteAddress, jint remotePort,
     jstring jCodecSpec, jint dtmfType, jstring opPackageNameStr)
 {
     AudioCodec *codec = NULL;
     AudioStream *stream = NULL;
     AudioGroup *group = NULL;

     // Sanity check.
     sockaddr_storage remote;
     if (parse(env, jRemoteAddress, remotePort, &remote) < 0) {
         // Exception already thrown.
         return 0;
     }
     if (!jCodecSpec) {
         jniThrowNullPointerException(env, "codecSpec");
         return 0;
     }
     const char *codecSpec = env->GetStringUTFChars(jCodecSpec, NULL);
     if (!codecSpec) {
         // Exception already thrown.
         return 0;
     }
     socket = dup(socket);
     if (socket == -1) {
         jniThrowException(env, "java/lang/IllegalStateException",
             "cannot get stream socket");
         return 0;
     }

     ScopedUtfChars opPackageName(env, opPackageNameStr);

     // Create audio codec.
     int codecType = -1;
     char codecName[16];
     int sampleRate = -1;
     sscanf(codecSpec, "%d %15[^/]%*c%d", &codecType, codecName, &sampleRate);
     codec = newAudioCodec(codecName);
     int sampleCount = (codec ? codec->set(sampleRate, codecSpec) : -1);
     env->ReleaseStringUTFChars(jCodecSpec, codecSpec);
     if (sampleCount <= 0) {
         jniThrowException(env, "java/lang/IllegalStateException",
             "cannot initialize audio codec");
         goto error;
     }

     // Create audio stream.
     stream = new AudioStream;
     if (!stream->set(mode, socket, &remote, codec, sampleRate, sampleCount,
         codecType, dtmfType)) {
         jniThrowException(env, "java/lang/IllegalStateException",
             "cannot initialize audio stream");
         goto error;
     }
     socket = -1;
     codec = NULL;

     // Create audio group.
     group = (AudioGroup *)env->GetLongField(thiz, gNative);
     if (!group) {
         int mode = env->GetIntField(thiz, gMode);
         group = new AudioGroup(String16(opPackageName.c_str()));
         if (!group->set(8000, 256) || !group->setMode(mode)) {
             jniThrowException(env, "java/lang/IllegalStateException",
                 "cannot initialize audio group");
             goto error;
         }
     }

     // Add audio stream into audio group.
     if (!group->add(stream)) {
         jniThrowException(env, "java/lang/IllegalStateException",
             "cannot add audio stream");
         goto error;
     }

     // Succeed.
     env->SetLongField(thiz, gNative, (jlong)group);
     return (jlong)stream;

 error:
     delete group;
     delete stream;
     delete codec;
     close(socket);
     env->SetLongField(thiz, gNative, 0);
     return 0;
 }

 void remove(JNIEnv *env, jobject thiz, jlong stream)
 {
     AudioGroup *group = (AudioGroup *)env->GetLongField(thiz, gNative);
     if (group) {
         if (!stream || !group->remove((AudioStream *)stream)) {
             delete group;
             env->SetLongField(thiz, gNative, 0);
         }
     }
 }

 void setMode(JNIEnv *env, jobject thiz, jint mode)
 {
     AudioGroup *group = (AudioGroup *)env->GetLongField(thiz, gNative);
     if (group && !group->setMode(mode)) {
         jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
     }
 }

 void sendDtmf(JNIEnv *env, jobject thiz, jint event)
 {
     AudioGroup *group = (AudioGroup *)env->GetLongField(thiz, gNative);
     if (group && !group->sendDtmf(event)) {
         jniThrowException(env, "java/lang/IllegalArgumentException", NULL);
     }
 }

 JNINativeMethod gMethods[] = {
     {"nativeAdd", "(IILjava/lang/String;ILjava/lang/String;ILjava/lang/String;)J", (void *)add},
     {"nativeRemove", "(J)V", (void *)remove},
     {"nativeSetMode", "(I)V", (void *)setMode},
     {"nativeSendDtmf", "(I)V", (void *)sendDtmf},
 };

 } // namespace

 int registerAudioGroup(JNIEnv *env)
 {
     gRandom = open("/dev/urandom", O_RDONLY);
     if (gRandom == -1) {
         ALOGE("urandom: %s", strerror(errno));
         return -1;
     }

     jclass clazz;
     if ((clazz = env->FindClass("android/net/rtp/AudioGroup")) == NULL ||
         (gNative = env->GetFieldID(clazz, "mNative", "J")) == NULL ||
         (gMode = env->GetFieldID(clazz, "mMode", "I")) == NULL ||
         env->RegisterNatives(clazz, gMethods, NELEM(gMethods)) < 0) {
         ALOGE("JNI registration failed");
         return -1;
     }
     return 0;
 }