diff --git a/Android.bp b/Android.bp
index b62d197..4214d85 100644
--- a/Android.bp
+++ b/Android.bp
@@ -12,8 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-cc_library_shared {
-    name: "libhwbinder",
+subdirs = [
+    "vts/performance",
+]
+
+cc_defaults {
+    name: "libhwbinder_defaults",
     shared_libs: [
         "libbase",
         "liblog",
@@ -55,5 +59,49 @@
         },
     },
 
-    cflags: ["-Werror"],
+    cflags: [
+        "-Wall",
+        "-Werror",
+    ],
+}
+
+cc_library {
+    name: "libhwbinder",
+    defaults: [
+        "libhwbinder_defaults",
+        "hwbinder_pgo",
+        "hwbinder_lto"
+    ],
+}
+
+// Explicitly provide a no lto, no PGO variant, to workaround the issue that we
+// can't detect non-lto users of the module in Android.mk.
+// http://b/77320844
+cc_library {
+    name: "libhwbinder_noltopgo",
+    defaults: [
+        "libhwbinder_defaults",
+    ],
+}
+
+// Provide pgo property to build hwbinder with PGO
+cc_defaults {
+    name: "hwbinder_pgo",
+    pgo: {
+        instrumentation: true,
+        profile_file: "hwbinder/hwbinder.profdata",
+        benchmarks: ["hwbinder"],
+        enable_profile_use: true,
+    },
+}
+// Provide lto property to build hwbinder with LTO
+cc_defaults {
+    name: "hwbinder_lto",
+    target: {
+        android: {
+            lto: {
+                thin: true,
+            },
+        },
+    },
 }
diff --git a/Binder.cpp b/Binder.cpp
index b6a58ff..147c7ca 100644
--- a/Binder.cpp
+++ b/Binder.cpp
@@ -219,11 +219,8 @@
 BpHwRefBase::BpHwRefBase(const sp<IBinder>& o)
     : mRemote(o.get()), mRefs(NULL), mState(0)
 {
-    extendObjectLifetime(OBJECT_LIFETIME_WEAK);
-
     if (mRemote) {
         mRemote->incStrong(this);           // Removed on first IncStrong().
-        mRefs = mRemote->createWeak(this);  // Held for our entire lifetime.
     }
 }
 
@@ -233,7 +230,6 @@
         if (!(mState.load(std::memory_order_relaxed)&kRemoteAcquired)) {
             mRemote->decStrong(this);
         }
-        mRefs->decWeak(this);
     }
 }
 
@@ -251,7 +247,7 @@
 
 bool BpHwRefBase::onIncStrongAttempted(uint32_t /*flags*/, const void* /*id*/)
 {
-    return mRemote ? mRefs->attemptIncStrong(this) : false;
+    return false;
 }
 
 // ---------------------------------------------------------------------------
diff --git a/BpHwBinder.cpp b/BpHwBinder.cpp
index ed7ed2a..90b3c6f 100644
--- a/BpHwBinder.cpp
+++ b/BpHwBinder.cpp
@@ -96,7 +96,7 @@
     ALOGV("Creating BpHwBinder %p handle %d\n", this, mHandle);
 
     extendObjectLifetime(OBJECT_LIFETIME_WEAK);
-    IPCThreadState::self()->incWeakHandle(handle);
+    IPCThreadState::self()->incWeakHandle(handle, this);
 }
 
 status_t BpHwBinder::transact(
@@ -282,7 +282,7 @@
 {
     ALOGV("onFirstRef BpHwBinder %p handle %d\n", this, mHandle);
     IPCThreadState* ipc = IPCThreadState::self();
-    if (ipc) ipc->incStrongHandle(mHandle);
+    if (ipc) ipc->incStrongHandle(mHandle, this);
 }
 
 void BpHwBinder::onLastStrongRef(const void* /*id*/)
diff --git a/BufferedTextOutput.cpp b/BufferedTextOutput.cpp
index 8ef48e1..9bddde5 100644
--- a/BufferedTextOutput.cpp
+++ b/BufferedTextOutput.cpp
@@ -19,11 +19,11 @@
 #include <hwbinder/BufferedTextOutput.h>
 #include <hwbinder/Debug.h>
 
-#include <utils/Atomic.h>
+#include <cutils/atomic.h>
+#include <cutils/threads.h>
 #include <utils/Log.h>
 #include <utils/RefBase.h>
 #include <utils/Vector.h>
-#include <cutils/threads.h>
 
 #include <hwbinder/Static.h>
 
diff --git a/Debug.cpp b/Debug.cpp
index fb97069..d304d21 100644
--- a/Debug.cpp
+++ b/Debug.cpp
@@ -222,7 +222,11 @@
 
         for (word = 0; word < bytesPerLine; ) {
 
-            const size_t startIndex = word+(alignment-(alignment?1:0));
+            size_t align_offset = alignment-(alignment?1:0);
+            if (remain > 0 && (size_t)remain <= align_offset) {
+                align_offset = remain - 1;
+            }
+            const size_t startIndex = word+align_offset;
 
             for (index = 0; index < alignment || (alignment == 0 && index < bytesPerLine); index++) {
 
diff --git a/IPCThreadState.cpp b/IPCThreadState.cpp
index b9ca20a..b36838e 100644
--- a/IPCThreadState.cpp
+++ b/IPCThreadState.cpp
@@ -412,6 +412,15 @@
     if (mProcess->mDriverFD <= 0)
         return;
     talkWithDriver(false);
+    // The flush could have caused post-write refcount decrements to have
+    // been executed, which in turn could result in BC_RELEASE/BC_DECREFS
+    // being queued in mOut. So flush again, if we need to.
+    if (mOut.dataSize() > 0) {
+        talkWithDriver(false);
+    }
+    if (mOut.dataSize() > 0) {
+        ALOGW("mOut.dataSize() > 0 after flushCommands()");
+    }
 }
 
 void IPCThreadState::blockUntilThreadAvailable()
@@ -453,12 +462,15 @@
 
         pthread_mutex_lock(&mProcess->mThreadCountLock);
         mProcess->mExecutingThreadsCount--;
-        if (mProcess->mExecutingThreadsCount < mProcess->mMaxThreads && mProcess->mMaxThreads > 1 &&
+        if (mProcess->mExecutingThreadsCount < mProcess->mMaxThreads &&
             mProcess->mStarvationStartTimeMs != 0) {
             int64_t starvationTimeMs = uptimeMillis() - mProcess->mStarvationStartTimeMs;
             if (starvationTimeMs > 100) {
-                ALOGW("All binder threads in pool (%zu threads) busy for %" PRId64 " ms",
-                      mProcess->mMaxThreads, starvationTimeMs);
+                // If there is only a single-threaded client, nobody would be blocked
+                // on this, and it's not really starvation. (see b/37647467)
+                ALOGW("All binder threads in pool (%zu threads) busy for %" PRId64 " ms%s",
+                      mProcess->mMaxThreads, starvationTimeMs,
+                      mProcess->mMaxThreads > 1 ? "" : " (may be a false alarm)");
             }
             mProcess->mStarvationStartTimeMs = 0;
         }
@@ -473,26 +485,58 @@
 void IPCThreadState::processPendingDerefs()
 {
     if (mIn.dataPosition() >= mIn.dataSize()) {
-        size_t numPending = mPendingWeakDerefs.size();
-        if (numPending > 0) {
-            for (size_t i = 0; i < numPending; i++) {
-                RefBase::weakref_type* refs = mPendingWeakDerefs[i];
+        /*
+         * The decWeak()/decStrong() calls may cause a destructor to run,
+         * which in turn could have initiated an outgoing transaction,
+         * which in turn could cause us to add to the pending refs
+         * vectors; so instead of simply iterating, loop until they're empty.
+         *
+         * We do this in an outer loop, because calling decStrong()
+         * may result in something being added to mPendingWeakDerefs,
+         * which could be delayed until the next incoming command
+         * from the driver if we don't process it now.
+         */
+        while (mPendingWeakDerefs.size() > 0 || mPendingStrongDerefs.size() > 0) {
+            while (mPendingWeakDerefs.size() > 0) {
+                RefBase::weakref_type* refs = mPendingWeakDerefs[0];
+                mPendingWeakDerefs.removeAt(0);
                 refs->decWeak(mProcess.get());
             }
-            mPendingWeakDerefs.clear();
-        }
 
-        numPending = mPendingStrongDerefs.size();
-        if (numPending > 0) {
-            for (size_t i = 0; i < numPending; i++) {
-                BHwBinder* obj = mPendingStrongDerefs[i];
+            if (mPendingStrongDerefs.size() > 0) {
+                // We don't use while() here because we don't want to re-order
+                // strong and weak decs at all; if this decStrong() causes both a
+                // decWeak() and a decStrong() to be queued, we want to process
+                // the decWeak() first.
+                BHwBinder* obj = mPendingStrongDerefs[0];
+                mPendingStrongDerefs.removeAt(0);
                 obj->decStrong(mProcess.get());
             }
-            mPendingStrongDerefs.clear();
         }
     }
 }
 
+void IPCThreadState::processPostWriteDerefs()
+{
+    /*
+     * libhwbinder has a flushCommands() in the BpHwBinder destructor,
+     * which makes this function (potentially) reentrant.
+     * New entries shouldn't be added though, so just iterating until empty
+     * should be safe.
+     */
+    while (mPostWriteWeakDerefs.size() > 0) {
+        RefBase::weakref_type* refs = mPostWriteWeakDerefs[0];
+        mPostWriteWeakDerefs.removeAt(0);
+        refs->decWeak(mProcess.get());
+    }
+
+    while (mPostWriteStrongDerefs.size() > 0) {
+        RefBase* obj = mPostWriteStrongDerefs[0];
+        mPostWriteStrongDerefs.removeAt(0);
+        obj->decStrong(mProcess.get());
+    }
+}
+
 void IPCThreadState::joinThreadPool(bool isMain)
 {
     LOG_THREADPOOL("**** THREAD %p (PID %d) IS JOINING THE THREAD POOL\n", (void*)pthread_self(), getpid());
@@ -500,6 +544,7 @@
     mOut.writeInt32(isMain ? BC_ENTER_LOOPER : BC_REGISTER_LOOPER);
 
     status_t result;
+    mIsLooper = true;
     do {
         processPendingDerefs();
         // now get the next command to be processed, waiting if necessary
@@ -522,6 +567,7 @@
         (void*)pthread_self(), getpid(), result);
 
     mOut.writeInt32(BC_EXIT_LOOPER);
+    mIsLooper = false;
     talkWithDriver(false);
 }
 
@@ -531,6 +577,12 @@
         return -EBADF;
     }
 
+    // Tells the kernel to not spawn any additional binder threads,
+    // as that won't work with polling. Also, the caller is responsible
+    // for subsequently calling handlePolledCommands()
+    mProcess->setThreadPoolConfiguration(1, true /* callerWillJoin */);
+    mIsPollingThread = true;
+
     mOut.writeInt32(BC_ENTER_LOOPER);
     *fd = mProcess->mDriverFD;
     return 0;
@@ -563,7 +615,7 @@
                                   uint32_t code, const Parcel& data,
                                   Parcel* reply, uint32_t flags)
 {
-    status_t err = data.errorCheck();
+    status_t err;
 
     flags |= TF_ACCEPT_FDS;
 
@@ -573,11 +625,9 @@
             << indent << data << dedent << endl;
     }
 
-    if (err == NO_ERROR) {
-        LOG_ONEWAY(">>>> SEND from pid %d uid %d %s", getpid(), getuid(),
-            (flags & TF_ONE_WAY) == 0 ? "READ REPLY" : "ONE WAY");
-        err = writeTransactionData(BC_TRANSACTION_SG, flags, handle, code, data, NULL);
-    }
+    LOG_ONEWAY(">>>> SEND from pid %d uid %d %s", getpid(), getuid(),
+        (flags & TF_ONE_WAY) == 0 ? "READ REPLY" : "ONE WAY");
+    err = writeTransactionData(BC_TRANSACTION_SG, flags, handle, code, data, NULL);
 
     if (err != NO_ERROR) {
         if (reply) reply->setError(err);
@@ -619,11 +669,14 @@
     return err;
 }
 
-void IPCThreadState::incStrongHandle(int32_t handle)
+void IPCThreadState::incStrongHandle(int32_t handle, BpHwBinder *proxy)
 {
     LOG_REMOTEREFS("IPCThreadState::incStrongHandle(%d)\n", handle);
     mOut.writeInt32(BC_ACQUIRE);
     mOut.writeInt32(handle);
+    // Create a temp reference until the driver has handled this command.
+    proxy->incStrong(mProcess.get());
+    mPostWriteStrongDerefs.push(proxy);
 }
 
 void IPCThreadState::decStrongHandle(int32_t handle)
@@ -633,11 +686,14 @@
     mOut.writeInt32(handle);
 }
 
-void IPCThreadState::incWeakHandle(int32_t handle)
+void IPCThreadState::incWeakHandle(int32_t handle, BpHwBinder *proxy)
 {
     LOG_REMOTEREFS("IPCThreadState::incWeakHandle(%d)\n", handle);
     mOut.writeInt32(BC_INCREFS);
     mOut.writeInt32(handle);
+    // Create a temp reference until the driver has handled this command.
+    proxy->getWeakRefs()->incWeak(mProcess.get());
+    mPostWriteWeakDerefs.push(proxy->getWeakRefs());
 }
 
 void IPCThreadState::decWeakHandle(int32_t handle)
@@ -676,7 +732,7 @@
 #if LOG_REFCOUNTS
     printf("IPCThreadState::expungeHandle(%ld)\n", handle);
 #endif
-    self()->mProcess->expungeHandle(handle, binder);
+    self()->mProcess->expungeHandle(handle, binder);  // NOLINT
 }
 
 status_t IPCThreadState::requestDeathNotification(int32_t handle, BpHwBinder* proxy)
@@ -699,12 +755,16 @@
     : mProcess(ProcessState::self()),
       mMyThreadId(gettid()),
       mStrictModePolicy(0),
-      mLastTransactionBinderFlags(0)
-{
+      mLastTransactionBinderFlags(0),
+      mIsLooper(false),
+      mIsPollingThread(false) {
     pthread_setspecific(gTLS, this);
     clearCaller();
     mIn.setDataCapacity(256);
     mOut.setDataCapacity(256);
+
+    // TODO(b/67742352): remove this variable from the class
+    (void)mMyThreadId;
 }
 
 IPCThreadState::~IPCThreadState()
@@ -889,8 +949,10 @@
         if (bwr.write_consumed > 0) {
             if (bwr.write_consumed < mOut.dataSize())
                 mOut.remove(0, bwr.write_consumed);
-            else
+            else {
                 mOut.setDataSize(0);
+                processPostWriteDerefs();
+            }
         }
         if (bwr.read_consumed > 0) {
             mIn.setDataSize(bwr.read_consumed);
@@ -915,28 +977,29 @@
     int32_t handle, uint32_t code, const Parcel& data, status_t* statusBuffer)
 {
     binder_transaction_data_sg tr_sg;
-    tr_sg.tr.target.ptr = 0; /* Don't pass uninitialized stack data to a remote process */
-    tr_sg.tr.target.handle = handle;
-    tr_sg.tr.code = code;
-    tr_sg.tr.flags = binderFlags;
-    tr_sg.tr.cookie = 0;
-    tr_sg.tr.sender_pid = 0;
-    tr_sg.tr.sender_euid = 0;
+    /* Don't pass uninitialized stack data to a remote process */
+    tr_sg.transaction_data.target.ptr = 0;
+    tr_sg.transaction_data.target.handle = handle;
+    tr_sg.transaction_data.code = code;
+    tr_sg.transaction_data.flags = binderFlags;
+    tr_sg.transaction_data.cookie = 0;
+    tr_sg.transaction_data.sender_pid = 0;
+    tr_sg.transaction_data.sender_euid = 0;
 
     const status_t err = data.errorCheck();
     if (err == NO_ERROR) {
-        tr_sg.tr.data_size = data.ipcDataSize();
-        tr_sg.tr.data.ptr.buffer = data.ipcData();
-        tr_sg.tr.offsets_size = data.ipcObjectsCount()*sizeof(binder_size_t);
-        tr_sg.tr.data.ptr.offsets = data.ipcObjects();
+        tr_sg.transaction_data.data_size = data.ipcDataSize();
+        tr_sg.transaction_data.data.ptr.buffer = data.ipcData();
+        tr_sg.transaction_data.offsets_size = data.ipcObjectsCount()*sizeof(binder_size_t);
+        tr_sg.transaction_data.data.ptr.offsets = data.ipcObjects();
         tr_sg.buffers_size = data.ipcBufferSize();
     } else if (statusBuffer) {
-        tr_sg.tr.flags |= TF_STATUS_CODE;
+        tr_sg.transaction_data.flags |= TF_STATUS_CODE;
         *statusBuffer = err;
-        tr_sg.tr.data_size = sizeof(status_t);
-        tr_sg.tr.data.ptr.buffer = reinterpret_cast<uintptr_t>(statusBuffer);
-        tr_sg.tr.offsets_size = 0;
-        tr_sg.tr.data.ptr.offsets = 0;
+        tr_sg.transaction_data.data_size = sizeof(status_t);
+        tr_sg.transaction_data.data.ptr.buffer = reinterpret_cast<uintptr_t>(statusBuffer);
+        tr_sg.transaction_data.offsets_size = 0;
+        tr_sg.transaction_data.data.ptr.offsets = 0;
         tr_sg.buffers_size = 0;
     } else {
         return (mLastError = err);
@@ -953,6 +1016,15 @@
     mContextObject = obj;
 }
 
+bool IPCThreadState::isLooperThread()
+{
+    return mIsLooper;
+}
+
+bool IPCThreadState::isOnlyBinderThread() {
+    return (mIsLooper && mProcess->mMaxThreads <= 1) || mIsPollingThread;
+}
+
 status_t IPCThreadState::executeCommand(int32_t cmd)
 {
     BHwBinder* obj;
diff --git a/Parcel.cpp b/Parcel.cpp
index 7f80c01..2b1bb90 100644
--- a/Parcel.cpp
+++ b/Parcel.cpp
@@ -98,7 +98,7 @@
 void acquire_binder_object(const sp<ProcessState>& proc,
     const flat_binder_object& obj, const void* who)
 {
-    switch (obj.type) {
+    switch (obj.hdr.type) {
         case BINDER_TYPE_BINDER:
             if (obj.binder) {
                 LOG_REFS("Parcel %p acquiring reference on local %p", who, obj.cookie);
@@ -124,7 +124,7 @@
         }
     }
 
-    ALOGD("Invalid object type 0x%08x", obj.type);
+    ALOGD("Invalid object type 0x%08x", obj.hdr.type);
 }
 
 void acquire_object(const sp<ProcessState>& proc, const binder_object_header& obj,
@@ -144,7 +144,7 @@
 void release_object(const sp<ProcessState>& proc,
     const flat_binder_object& obj, const void* who)
 {
-    switch (obj.type) {
+    switch (obj.hdr.type) {
         case BINDER_TYPE_BINDER:
             if (obj.binder) {
                 LOG_REFS("Parcel %p releasing reference on local %p", who, obj.cookie);
@@ -184,7 +184,7 @@
         }
     }
 
-    ALOGE("Invalid object type 0x%08x", obj.type);
+    ALOGE("Invalid object type 0x%08x", obj.hdr.type);
 }
 
 inline static status_t finish_flatten_binder(
@@ -206,7 +206,7 @@
                 ALOGE("null proxy");
             }
             const int32_t handle = proxy ? proxy->handle() : 0;
-            obj.type = BINDER_TYPE_HANDLE;
+            obj.hdr.type = BINDER_TYPE_HANDLE;
             obj.flags = FLAT_BINDER_FLAG_ACCEPTS_FDS;
             obj.binder = 0; /* Don't pass uninitialized stack data to a remote process */
             obj.handle = handle;
@@ -222,12 +222,12 @@
             if (local->isRequestingSid()) {
                 obj.flags |= FLAT_BINDER_FLAG_TXN_SECURITY_CTX;
             }
-            obj.type = BINDER_TYPE_BINDER;
+            obj.hdr.type = BINDER_TYPE_BINDER;
             obj.binder = reinterpret_cast<uintptr_t>(local->getWeakRefs());
             obj.cookie = reinterpret_cast<uintptr_t>(local);
         }
     } else {
-        obj.type = BINDER_TYPE_BINDER;
+        obj.hdr.type = BINDER_TYPE_BINDER;
         obj.binder = 0;
         obj.cookie = 0;
     }
@@ -251,12 +251,12 @@
                     ALOGE("null proxy");
                 }
                 const int32_t handle = proxy ? proxy->handle() : 0;
-                obj.type = BINDER_TYPE_WEAK_HANDLE;
+                obj.hdr.type = BINDER_TYPE_WEAK_HANDLE;
                 obj.binder = 0; /* Don't pass uninitialized stack data to a remote process */
                 obj.handle = handle;
                 obj.cookie = 0;
             } else {
-                obj.type = BINDER_TYPE_WEAK_BINDER;
+                obj.hdr.type = BINDER_TYPE_WEAK_BINDER;
                 obj.binder = reinterpret_cast<uintptr_t>(binder.get_refs());
                 obj.cookie = reinterpret_cast<uintptr_t>(binder.unsafe_get());
             }
@@ -271,13 +271,13 @@
         // but we can't do that with the different reference counting
         // implementation we are using.
         ALOGE("Unable to unflatten Binder weak reference!");
-        obj.type = BINDER_TYPE_BINDER;
+        obj.hdr.type = BINDER_TYPE_BINDER;
         obj.binder = 0;
         obj.cookie = 0;
         return finish_flatten_binder(NULL, obj, out);
 
     } else {
-        obj.type = BINDER_TYPE_BINDER;
+        obj.hdr.type = BINDER_TYPE_BINDER;
         obj.binder = 0;
         obj.cookie = 0;
         return finish_flatten_binder(NULL, obj, out);
@@ -297,7 +297,7 @@
     const flat_binder_object* flat = in.readObject<flat_binder_object>();
 
     if (flat) {
-        switch (flat->type) {
+        switch (flat->hdr.type) {
             case BINDER_TYPE_BINDER:
                 *out = reinterpret_cast<IBinder*>(flat->cookie);
                 return finish_unflatten_binder(NULL, *flat, in);
@@ -316,7 +316,7 @@
     const flat_binder_object* flat = in.readObject<flat_binder_object>();
 
     if (flat) {
-        switch (flat->type) {
+        switch (flat->hdr.type) {
             case BINDER_TYPE_BINDER:
                 *out = reinterpret_cast<IBinder*>(flat->cookie);
                 return finish_unflatten_binder(NULL, *flat, in);
@@ -342,12 +342,12 @@
 /*
  * Return true iff:
  * 1. obj is indeed a binder_buffer_object (type is BINDER_TYPE_PTR), and
- * 2. obj does NOT have the flag BINDER_BUFFER_REF (it is not a reference, but
+ * 2. obj does NOT have the flag BINDER_BUFFER_FLAG_REF (it is not a reference, but
  *    an actual buffer.)
  */
 static inline bool isBuffer(const binder_buffer_object& obj) {
     return obj.hdr.type == BINDER_TYPE_PTR
-        && (obj.flags & BINDER_BUFFER_REF) == 0;
+        && (obj.flags & BINDER_BUFFER_FLAG_REF) == 0;
 }
 
 // ---------------------------------------------------------------------------
@@ -753,7 +753,6 @@
                 break;
             }
             case BINDER_TYPE_FD: {
-                const binder_fd_object *fd_obj = reinterpret_cast<const binder_fd_object*>(hdr);
                 // remember if it's a file descriptor
                 if (!mAllowFds) {
                     // fail before modifying our object index
@@ -847,7 +846,7 @@
     obj.hdr.type = BINDER_TYPE_PTR;
     obj.buffer = reinterpret_cast<binder_uintptr_t>(buffer);
     obj.length = length;
-    obj.flags = BINDER_BUFFER_HAS_PARENT;
+    obj.flags = BINDER_BUFFER_FLAG_HAS_PARENT;
     if(!validateBufferParent(parent_buffer_handle, parent_offset))
         return BAD_VALUE;
     obj.parent = parent_buffer_handle;
@@ -891,11 +890,13 @@
         return status;
     binder_buffer_object obj;
     obj.hdr.type = BINDER_TYPE_PTR;
-    obj.flags = BINDER_BUFFER_REF;
+    obj.flags = BINDER_BUFFER_FLAG_REF;
     if (!validateBufferChild(child_buffer_handle, child_offset))
         return BAD_VALUE;
-    obj.child = child_buffer_handle;
-    obj.child_offset = child_offset;
+    // The current binder.h does not have child and child_offset names yet.
+    // Use the buffer and length parameters.
+    obj.buffer = child_buffer_handle;
+    obj.length = child_offset;
     if (handle != nullptr)
         // We use an index into mObjects as a handle
         *handle = mObjectsSize;
@@ -916,11 +917,13 @@
         return status;
     binder_buffer_object obj;
     obj.hdr.type = BINDER_TYPE_PTR;
-    obj.flags = BINDER_BUFFER_REF | BINDER_BUFFER_HAS_PARENT;
+    obj.flags = BINDER_BUFFER_FLAG_REF | BINDER_BUFFER_FLAG_HAS_PARENT;
     if (!validateBufferChild(child_buffer_handle, child_offset))
         return BAD_VALUE;
-    obj.child = child_buffer_handle;
-    obj.child_offset = child_offset;
+    // The current binder.h does not have child and child_offset names yet.
+    // Use the buffer and length parameters.
+    obj.buffer = child_buffer_handle;
+    obj.length = child_offset;
     if(!validateBufferParent(parent_buffer_handle, parent_offset))
         return BAD_VALUE;
     obj.parent = parent_buffer_handle;
@@ -939,7 +942,7 @@
         return status;
     binder_buffer_object obj;
     obj.hdr.type = BINDER_TYPE_PTR;
-    obj.flags = BINDER_BUFFER_REF;
+    obj.flags = BINDER_BUFFER_FLAG_REF;
     if (handle != nullptr)
         // We use an index into mObjects as a handle
         *handle = mObjectsSize;
@@ -957,7 +960,7 @@
         return status;
     binder_buffer_object obj;
     obj.hdr.type = BINDER_TYPE_PTR;
-    obj.flags = BINDER_BUFFER_REF | BINDER_BUFFER_HAS_PARENT;
+    obj.flags = BINDER_BUFFER_FLAG_REF | BINDER_BUFFER_FLAG_HAS_PARENT;
     // parent_buffer_handle and parent_offset needs to be checked.
     if(!validateBufferParent(parent_buffer_handle, parent_offset))
         return BAD_VALUE;
@@ -1050,7 +1053,6 @@
     struct binder_fd_array_object fd_array;
     size_t buffer_handle;
     status_t status = OK;
-    uint32_t flags = 0;
 
     if (handle == nullptr) {
         status = writeUint64(0);
@@ -1543,7 +1545,7 @@
         return false;
     }
 
-    if (flags & BINDER_BUFFER_HAS_PARENT) {
+    if (flags & BINDER_BUFFER_FLAG_HAS_PARENT) {
         if (buffer_obj->parent != parent) {
             ALOGE("Buffer parent %" PRIu64 " does not match expected parent %zu.",
                   static_cast<uint64_t>(buffer_obj->parent), parent);
@@ -1563,8 +1565,6 @@
                             uint32_t flags, size_t parent, size_t parentOffset,
                             const void **buffer_out) const {
 
-    status_t status = OK;
-
     const binder_buffer_object* buffer_obj = readObject<binder_buffer_object>(buffer_handle);
 
     if (buffer_obj == nullptr || !isBuffer(*buffer_obj)) {
@@ -1621,7 +1621,7 @@
                                             size_t parent_offset,
                                             const void **buffer_out) const
 {
-    return readBuffer(buffer_size, buffer_handle, BINDER_BUFFER_HAS_PARENT,
+    return readBuffer(buffer_size, buffer_handle, BINDER_BUFFER_FLAG_HAS_PARENT,
                       parent_buffer_handle, parent_offset, buffer_out);
 }
 
@@ -1641,7 +1641,7 @@
             *buffer_handle = 0; // TODO fix this, as readBuffer would do
         }
         if(isRef != nullptr) {
-            *isRef = (buffer_obj->flags & BINDER_BUFFER_REF) != 0;
+            *isRef = (buffer_obj->flags & BINDER_BUFFER_FLAG_REF) != 0;
             LOG_BUFFER("    readReference: isRef = %d", *isRef);
         }
         // in read side, always use .buffer and .length.
@@ -1786,7 +1786,7 @@
         i--;
         const flat_binder_object* flat
             = reinterpret_cast<flat_binder_object*>(mData+mObjects[i]);
-        if (flat->type == BINDER_TYPE_FD) {
+        if (flat->hdr.type == BINDER_TYPE_FD) {
             //ALOGI("Closing fd: %ld", flat->handle);
             close(flat->handle);
         }
@@ -1884,7 +1884,7 @@
         for (size_t i=0; i<N; i++) {
             const flat_binder_object* flat
                 = reinterpret_cast<const flat_binder_object*>(DATA+OBJS[i]);
-            if (flat->type == BINDER_TYPE_PTR) {
+            if (flat->hdr.type == BINDER_TYPE_PTR) {
                 const binder_buffer_object* buffer
                     = reinterpret_cast<const binder_buffer_object*>(DATA+OBJS[i]);
                 if(isBuffer(*buffer)) {
@@ -1897,7 +1897,7 @@
                 }
             } else {
                 to << endl << "Object #" << i << " @ " << (void*)OBJS[i] << ": "
-                    << TypeCode(flat->type & 0x7f7f7f00)
+                    << TypeCode(flat->hdr.type & 0x7f7f7f00)
                     << " = " << flat->binder;
             }
         }
@@ -2122,7 +2122,7 @@
             for (size_t i=objectsSize; i<mObjectsSize; i++) {
                 const flat_binder_object* flat
                     = reinterpret_cast<flat_binder_object*>(mData+mObjects[i]);
-                if (flat->type == BINDER_TYPE_FD) {
+                if (flat->hdr.type == BINDER_TYPE_FD) {
                     // will need to rescan because we may have lopped off the only FDs
                     mFdsKnown = false;
                 }
@@ -2151,7 +2151,7 @@
                 pthread_mutex_unlock(&gParcelGlobalAllocSizeLock);
                 mData = data;
                 mDataCapacity = desired;
-            } else if (desired > mDataCapacity) {
+            } else {
                 mError = NO_MEMORY;
                 return NO_MEMORY;
             }
@@ -2235,7 +2235,7 @@
     for (size_t i=0; i<mObjectsSize; i++) {
         const flat_binder_object* flat
             = reinterpret_cast<const flat_binder_object*>(mData + mObjects[i]);
-        if (flat->type == BINDER_TYPE_FD) {
+        if (flat->hdr.type == BINDER_TYPE_FD) {
             hasFds = true;
             break;
         }
diff --git a/ProcessState.cpp b/ProcessState.cpp
index e4e1a12..37e3611 100644
--- a/ProcessState.cpp
+++ b/ProcessState.cpp
@@ -18,10 +18,10 @@
 
 #include <hwbinder/ProcessState.h>
 
+#include <cutils/atomic.h>
 #include <hwbinder/BpHwBinder.h>
 #include <hwbinder/IPCThreadState.h>
 #include <hwbinder/binder_kernel.h>
-#include <utils/Atomic.h>
 #include <utils/Log.h>
 #include <utils/String8.h>
 #include <utils/threads.h>
@@ -39,7 +39,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#define BINDER_VM_SIZE ((1 * 1024 * 1024) - sysconf(_SC_PAGE_SIZE) * 2)
+#define DEFAULT_BINDER_VM_SIZE ((1 * 1024 * 1024) - sysconf(_SC_PAGE_SIZE) * 2)
 #define DEFAULT_MAX_BINDER_THREADS 0
 
 // -------------------------------------------------------------------------
@@ -71,7 +71,7 @@
     if (gProcess != NULL) {
         return gProcess;
     }
-    gProcess = new ProcessState;
+    gProcess = new ProcessState(DEFAULT_BINDER_VM_SIZE);
     return gProcess;
 }
 
@@ -80,6 +80,18 @@
     return gProcess;
 }
 
+sp<ProcessState> ProcessState::initWithMmapSize(size_t mmap_size) {
+    Mutex::Autolock _l(gProcessMutex);
+    if (gProcess != NULL) {
+        LOG_ALWAYS_FATAL_IF(mmap_size != gProcess->getMmapSize(),
+                "ProcessState already initialized with a different mmap size.");
+        return gProcess;
+    }
+
+    gProcess = new ProcessState(mmap_size);
+    return gProcess;
+}
+
 void ProcessState::setContextObject(const sp<IBinder>& object)
 {
     setContextObject(object, String16("default"));
@@ -208,6 +220,10 @@
     return count;
 }
 
+size_t ProcessState::getMmapSize() {
+    return mMmapSize;
+}
+
 ProcessState::handle_entry* ProcessState::lookupHandleLocked(int32_t handle)
 {
     const size_t N=mHandleToObject.size();
@@ -333,6 +349,10 @@
     return result;
 }
 
+size_t ProcessState::getMaxThreads() {
+    return mMaxThreads;
+}
+
 void ProcessState::giveThreadPoolName() {
     androidSetThreadName( makeBinderThreadName().string() );
 }
@@ -364,7 +384,7 @@
     return fd;
 }
 
-ProcessState::ProcessState()
+ProcessState::ProcessState(size_t mmap_size)
     : mDriverFD(open_driver())
     , mVMStart(MAP_FAILED)
     , mThreadCountLock(PTHREAD_MUTEX_INITIALIZER)
@@ -378,10 +398,11 @@
     , mThreadPoolStarted(false)
     , mSpawnThreadOnStart(true)
     , mThreadPoolSeq(1)
+    , mMmapSize(mmap_size)
 {
     if (mDriverFD >= 0) {
         // mmap the binder, providing a chunk of virtual address space to receive transactions.
-        mVMStart = mmap(0, BINDER_VM_SIZE, PROT_READ, MAP_PRIVATE | MAP_NORESERVE, mDriverFD, 0);
+        mVMStart = mmap(0, mMmapSize, PROT_READ, MAP_PRIVATE | MAP_NORESERVE, mDriverFD, 0);
         if (mVMStart == MAP_FAILED) {
             // *sigh*
             ALOGE("Using /dev/hwbinder failed: unable to mmap transaction memory.\n");
@@ -398,7 +419,7 @@
 {
     if (mDriverFD >= 0) {
         if (mVMStart != MAP_FAILED) {
-            munmap(mVMStart, BINDER_VM_SIZE);
+            munmap(mVMStart, mMmapSize);
         }
         close(mDriverFD);
     }
diff --git a/Static.cpp b/Static.cpp
index c63e503..a84f811 100644
--- a/Static.cpp
+++ b/Static.cpp
@@ -73,7 +73,7 @@
 
 // ------------ ProcessState.cpp
 
-Mutex gProcessMutex;
+Mutex& gProcessMutex = *new Mutex;
 sp<ProcessState> gProcess;
 
 class LibHwbinderIPCtStatics
diff --git a/include/hwbinder/IInterface.h b/include/hwbinder/IInterface.h
index 7ed6b42..17cbae9 100644
--- a/include/hwbinder/IInterface.h
+++ b/include/hwbinder/IInterface.h
@@ -38,23 +38,6 @@
 // ----------------------------------------------------------------------
 
 template<typename INTERFACE>
-class BnInterface : public INTERFACE, public IInterface, public BHwBinder
-{
-public:
-                                BnInterface(const sp<INTERFACE>& impl);
-protected:
-    const sp<INTERFACE>         mImpl;
-    virtual IBinder*            onAsBinder();
-};
-
-template<typename INTERFACE>
-inline BnInterface<INTERFACE>::BnInterface(
-        const sp<INTERFACE>& impl) : mImpl(impl)
-{
-}
-// ----------------------------------------------------------------------
-
-template<typename INTERFACE>
 class BpInterface : public INTERFACE, public IInterface, public BpHwRefBase
 {
 public:
@@ -67,11 +50,6 @@
 // ----------------------------------------------------------------------
 // No user-serviceable parts after this...
 
-template<typename INTERFACE>
-IBinder* BnInterface<INTERFACE>::onAsBinder()
-{
-    return this;
-}
 
 template<typename INTERFACE>
 inline BpInterface<INTERFACE>::BpInterface(const sp<IBinder>& remote)
diff --git a/include/hwbinder/IPCThreadState.h b/include/hwbinder/IPCThreadState.h
index a3224c3..32eea8c 100644
--- a/include/hwbinder/IPCThreadState.h
+++ b/include/hwbinder/IPCThreadState.h
@@ -71,9 +71,9 @@
                                          uint32_t code, const Parcel& data,
                                          Parcel* reply, uint32_t flags);
 
-            void                incStrongHandle(int32_t handle);
+            void                incStrongHandle(int32_t handle, BpHwBinder *proxy);
             void                decStrongHandle(int32_t handle);
-            void                incWeakHandle(int32_t handle);
+            void                incWeakHandle(int32_t handle, BpHwBinder *proxy);
             void                decWeakHandle(int32_t handle);
             status_t            attemptIncStrongHandle(int32_t handle);
     static  void                expungeHandle(int32_t handle, IBinder* binder);
@@ -97,6 +97,10 @@
 
             // Service manager registration
             void                setTheContextObject(sp<BHwBinder> obj);
+
+            bool                isLooperThread();
+            bool                isOnlyBinderThread();
+
 private:
                                 IPCThreadState();
                                 ~IPCThreadState();
@@ -114,6 +118,7 @@
             status_t            getAndExecuteCommand();
             status_t            executeCommand(int32_t command);
             void                processPendingDerefs();
+            void                processPostWriteDerefs();
 
             void                clearCaller();
 
@@ -127,7 +132,8 @@
     const   pid_t               mMyThreadId;
             Vector<BHwBinder*>    mPendingStrongDerefs;
             Vector<RefBase::weakref_type*> mPendingWeakDerefs;
-
+            Vector<RefBase*>    mPostWriteStrongDerefs;
+            Vector<RefBase::weakref_type*> mPostWriteWeakDerefs;
             Parcel              mIn;
             Parcel              mOut;
             status_t            mLastError;
@@ -137,6 +143,8 @@
             int32_t             mStrictModePolicy;
             int32_t             mLastTransactionBinderFlags;
             sp<BHwBinder>         mContextObject;
+            bool                mIsLooper;
+            bool mIsPollingThread;
 };
 
 }; // namespace hardware
diff --git a/include/hwbinder/Parcel.h b/include/hwbinder/Parcel.h
index debffa3..f6e45d5 100644
--- a/include/hwbinder/Parcel.h
+++ b/include/hwbinder/Parcel.h
@@ -20,7 +20,6 @@
 #include <string>
 #include <vector>
 
-#include <android-base/unique_fd.h>
 #include <cutils/native_handle.h>
 #include <utils/Errors.h>
 #include <utils/RefBase.h>
diff --git a/include/hwbinder/ProcessState.h b/include/hwbinder/ProcessState.h
index 5441b75..2f72059 100644
--- a/include/hwbinder/ProcessState.h
+++ b/include/hwbinder/ProcessState.h
@@ -37,6 +37,8 @@
 public:
     static  sp<ProcessState>    self();
     static  sp<ProcessState>    selfOrNull();
+    // Note: don't call self() or selfOrNull() before initWithMmapSize()
+    static  sp<ProcessState>    initWithMmapSize(size_t mmapSize); // size in bytes
 
             void                setContextObject(const sp<IBinder>& object);
             sp<IBinder>         getContextObject(const sp<IBinder>& caller);
@@ -64,14 +66,15 @@
             void                spawnPooledThread(bool isMain);
 
             status_t            setThreadPoolConfiguration(size_t maxThreads, bool callerJoinsPool);
+            size_t              getMaxThreads();
             void                giveThreadPoolName();
 
             ssize_t             getKernelReferences(size_t count, uintptr_t* buf);
-
+            size_t              getMmapSize();
 private:
     friend class IPCThreadState;
 
-                                ProcessState();
+                                ProcessState(size_t mmap_size);
                                 ~ProcessState();
 
                                 ProcessState(const ProcessState& o);
@@ -114,6 +117,7 @@
             bool                mThreadPoolStarted;
             bool                mSpawnThreadOnStart;
     volatile int32_t            mThreadPoolSeq;
+            size_t              mMmapSize;
 };
 
 }; // namespace hardware
diff --git a/include/hwbinder/Static.h b/include/hwbinder/Static.h
index 99821ca..4b84c89 100644
--- a/include/hwbinder/Static.h
+++ b/include/hwbinder/Static.h
@@ -29,7 +29,7 @@
 extern Vector<int32_t> gTextBuffers;
 
 // For ProcessState.cpp
-extern Mutex gProcessMutex;
+extern Mutex& gProcessMutex;
 extern sp<ProcessState> gProcess;
 
 }   // namespace hardware
diff --git a/include/hwbinder/binder_kernel.h b/include/hwbinder/binder_kernel.h
index 48da770..27a0595 100644
--- a/include/hwbinder/binder_kernel.h
+++ b/include/hwbinder/binder_kernel.h
@@ -21,89 +21,13 @@
 
 /**
  * This file exists because the uapi kernel headers in bionic are built
- * from upstream kernel headers only, and the hwbinder kernel changes
- * haven't made it upstream yet. Therefore, the modifications to the
+ * from upstream kernel headers only, and not all of the hwbinder kernel changes
+ * have made it upstream yet. Therefore, the modifications to the
  * binder header are added locally in this file.
  */
 
 enum {
-	BINDER_TYPE_PTR		= B_PACK_CHARS('p', 't', '*', B_TYPE_LARGE),
-	BINDER_TYPE_FDA		= B_PACK_CHARS('f', 'd', 'a', B_TYPE_LARGE),
-};
-
-/* This header is used in all binder objects that are fixed
- * up by the kernel driver */
-struct binder_object_header {
-	__u32        type;
-};
-
-struct binder_fd_object {
-	struct binder_object_header	hdr;
-	/* FD objects used to be represented in flat_binder_object as well,
-	 * so we're using pads here to remain compatibile to existing userspace
-	 * clients.
-	 */
-	__u32				pad_flags;
-	union {
-		binder_uintptr_t	pad_binder;
-		__u32			fd;
-	};
-
-	binder_uintptr_t		cookie;
-};
-
-/* A binder_buffer object represents an object that the
- * binder kernel driver copies verbatim to the target
- * address space. A buffer itself may be pointed to from
- * within another buffer, meaning that the pointer inside
- * that other buffer needs to be fixed up as well. This
- * can be done by specifying the parent buffer, and the
- * byte offset at which the pointer lives in that buffer.
- */
-struct binder_buffer_object {
-	struct binder_object_header	hdr;
-	__u32				flags;
-
-	union {
-		struct {
-			binder_uintptr_t   buffer; /* Pointer to buffer data */
-			binder_size_t      length; /* Length of the buffer data */
-		};
-		struct {
-			binder_size_t      child;        /* index of child in objects array */
-			binder_size_t      child_offset; /* byte offset in child buffer */
-		};
-	};
-	binder_size_t			parent; /* index of parent in objects array */
-	binder_size_t			parent_offset; /* byte offset of pointer in parent buffer */
-};
-
-enum {
-	BINDER_BUFFER_HAS_PARENT   = 1U << 0,
-	BINDER_BUFFER_REF          = 1U << 1,
-};
-
-/* A binder_fd_array object represents an array of file
- * descriptors embedded in a binder_buffer_object. The
- * kernel driver will fix up all file descriptors in
- * the parent buffer specified by parent and parent_offset
- */
-struct binder_fd_array_object {
-	struct binder_object_header	hdr;
-	__u32			_pad; /* hdr is 4 bytes, ensure 8-byte alignment of next fields */
-	binder_size_t		num_fds;
-	binder_size_t		parent; /* index of parent in objects array */
-	binder_size_t		parent_offset; /* offset of pointer in parent */
-};
-
-struct binder_transaction_data_sg {
-    binder_transaction_data    tr; /* regular transaction data */
-    binder_size_t              buffers_size; /* number of bytes of SG buffers */
-};
-
-enum {
-	BC_TRANSACTION_SG = _IOW('c', 17, struct binder_transaction_data_sg),
-	BC_REPLY_SG = _IOW('c', 18, struct binder_transaction_data_sg),
+	BINDER_BUFFER_FLAG_REF          = 1U << 1,
 };
 
 enum {
@@ -125,13 +49,4 @@
                               struct binder_transaction_data_secctx),
 };
 
-struct binder_node_debug_info {
-	binder_uintptr_t ptr;
-	binder_uintptr_t cookie;
-	__u32 has_strong_ref;
-	__u32 has_weak_ref;
-};
-
-#define BINDER_GET_NODE_DEBUG_INFO _IOWR('b', 11, struct binder_node_debug_info)
-
 #endif // ANDROID_HARDWARE_BINDER_KERNEL_H
diff --git a/vts/OWNERS b/vts/OWNERS
index db577ad..6a26ae7 100644
--- a/vts/OWNERS
+++ b/vts/OWNERS
@@ -1,3 +1,2 @@
-set noparent
 yim@google.com
 zhuoyao@google.com
diff --git a/vts/performance/Android.bp b/vts/performance/Android.bp
new file mode 100644
index 0000000..c5fa2a0
--- /dev/null
+++ b/vts/performance/Android.bp
@@ -0,0 +1,80 @@
+//
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+cc_defaults {
+    name: "libhwbinder_test_defaults",
+
+    cflags: [
+        "-Wall",
+        "-Werror",
+    ],
+    shared_libs: [
+        "libhidlbase",
+        "libhidltransport",
+        "libhwbinder",
+        "liblog",
+        "libutils",
+        "libcutils",
+    ],
+}
+
+cc_benchmark {
+    name: "libhwbinder_benchmark",
+    defaults: ["libhwbinder_test_defaults"],
+    srcs: ["Benchmark.cpp"],
+    static_libs: ["android.hardware.tests.libhwbinder@1.0"],
+    required: ["android.hardware.tests.libhwbinder@1.0-impl"],
+}
+
+// build for benchmark test based on binder.
+cc_benchmark {
+    name: "libbinder_benchmark",
+
+    srcs: ["Benchmark_binder.cpp"],
+    cflags: [
+        "-Wall",
+        "-Werror",
+    ],
+    shared_libs: [
+        "libbinder",
+        "libutils",
+        "android.hardware.tests.libbinder",
+    ],
+}
+
+// build for throughput benchmark test for hwbinder.
+cc_test {
+    name: "hwbinderThroughputTest",
+    defaults: ["libhwbinder_test_defaults"],
+    srcs: ["Benchmark_throughput.cpp"],
+    static_libs: ["android.hardware.tests.libhwbinder@1.0"],
+    required: ["android.hardware.tests.libhwbinder@1.0-impl"],
+}
+
+// build for latency benchmark test for hwbinder.
+cc_test {
+    name: "libhwbinder_latency",
+    defaults: ["libhwbinder_test_defaults"],
+
+    srcs: [
+        "Latency.cpp",
+        "PerfTest.cpp",
+    ],
+
+    static_libs: ["android.hardware.tests.libhwbinder@1.0"],
+
+    required: ["android.hardware.tests.libhwbinder@1.0-impl"],
+}
diff --git a/vts/performance/Android.mk b/vts/performance/Android.mk
deleted file mode 100644
index b238715..0000000
--- a/vts/performance/Android.mk
+++ /dev/null
@@ -1,99 +0,0 @@
-#
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-LOCAL_MODULE := libhwbinder_benchmark
-LOCAL_MODULE_STEM_64 := libhwbinder_benchmark64
-LOCAL_MODULE_STEM_32 := libhwbinder_benchmark32
-
-LOCAL_MODULE_TAGS := eng tests
-
-LOCAL_SRC_FILES := Benchmark.cpp
-LOCAL_SHARED_LIBRARIES := \
-    libhwbinder \
-    libhidlbase \
-    libhidltransport \
-    liblog \
-    libutils \
-    android.hardware.tests.libhwbinder@1.0
-LOCAL_REQUIRED_MODULES := android.hardware.tests.libhwbinder@1.0-impl
-
-LOCAL_MULTILIB := both
-
-include $(BUILD_NATIVE_BENCHMARK)
-
-
-# build for benchmark test based on binder.
-include $(CLEAR_VARS)
-LOCAL_MODULE := libbinder_benchmark
-LOCAL_MODULE_STEM_64 := libbinder_benchmark64
-LOCAL_MODULE_STEM_32 := libbinder_benchmark32
-
-LOCAL_MODULE_TAGS := eng tests
-
-LOCAL_SRC_FILES := Benchmark_binder.cpp
-LOCAL_SHARED_LIBRARIES := \
-    libbinder \
-    libutils \
-    android.hardware.tests.libbinder
-
-LOCAL_MULTILIB := both
-
-include $(BUILD_NATIVE_BENCHMARK)
-
-# build for throughput benchmark test for hwbinder.
-include $(CLEAR_VARS)
-LOCAL_MODULE := hwbinderThroughputTest
-
-LOCAL_MODULE_TAGS := eng tests
-
-LOCAL_SRC_FILES := Benchmark_throughput.cpp
-LOCAL_SHARED_LIBRARIES := \
-    libhwbinder \
-    libhidlbase \
-    libhidltransport \
-    liblog \
-    libutils \
-    android.hardware.tests.libhwbinder@1.0
-
-LOCAL_REQUIRED_MODULES := android.hardware.tests.libhwbinder@1.0-impl
-LOCAL_C_INCLUDES := system/libhwbinder/include
-
-LOCAL_MULTILIB := both
-include $(BUILD_NATIVE_TEST)
-
-# build for latency benchmark test for hwbinder.
-include $(CLEAR_VARS)
-LOCAL_MODULE := libhwbinder_latency
-
-LOCAL_MODULE_TAGS := eng tests
-
-LOCAL_SRC_FILES := Latency.cpp PerfTest.cpp
-LOCAL_SHARED_LIBRARIES := \
-    libhwbinder \
-    libhidlbase \
-    libhidltransport \
-    liblog \
-    libutils \
-    android.hardware.tests.libhwbinder@1.0
-
-LOCAL_REQUIRED_MODULES := android.hardware.tests.libhwbinder@1.0-impl
-LOCAL_C_INCLUDES := system/libhwbinder/include
-
-LOCAL_MULTILIB := both
-include $(BUILD_NATIVE_TEST)
diff --git a/vts/performance/Benchmark.cpp b/vts/performance/Benchmark.cpp
index 4068903..e7d75cd 100644
--- a/vts/performance/Benchmark.cpp
+++ b/vts/performance/Benchmark.cpp
@@ -136,7 +136,6 @@
         // Child, start benchmarks
         ::benchmark::RunSpecifiedBenchmarks();
     } else {
-        int stat;
         startServer();
         while (true) {
             int stat, retval;
diff --git a/vts/performance/Benchmark_binder.cpp b/vts/performance/Benchmark_binder.cpp
index 1f8c1db..1458fc9 100644
--- a/vts/performance/Benchmark_binder.cpp
+++ b/vts/performance/Benchmark_binder.cpp
@@ -36,7 +36,6 @@
 
 // libbinder:
 using android::getService;
-using android::BnInterface;
 using android::defaultServiceManager;
 using android::ProcessState;
 using android::binder::Status;
@@ -63,6 +62,8 @@
 
 bool startServer() {
     BenchmarkServiceAidl *service = new BenchmarkServiceAidl();
+    // Tells the kernel to spawn zero threads, but startThreadPool() below will still spawn one.
+    ProcessState::self()->setThreadPoolMaxThreadCount(0);
     defaultServiceManager()->addService(String16(kServiceName),
                                         service);
     ProcessState::self()->startThreadPool();
@@ -99,7 +100,6 @@
         // Child, start benchmarks
         ::benchmark::RunSpecifiedBenchmarks();
     } else {
-        int stat;
         startServer();
         while (true) {
             int stat, retval;
