am 0c3d21e6: am e480fc83: bionic: fix pthread_{create, exit}/signal race condition

* commit '0c3d21e63c6e75ae73aaf2b8d64af0bd8caa6beb':
  bionic: fix pthread_{create, exit}/signal race condition
diff --git a/libc/Android.mk b/libc/Android.mk
index 49c8731..207a133 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -346,13 +346,15 @@
 	arch-arm/bionic/__get_sp.S \
 	arch-arm/bionic/_exit_with_stack_teardown.S \
 	arch-arm/bionic/_setjmp.S \
-	arch-arm/bionic/atomics_arm.S \
+	arch-arm/bionic/atomics_arm.c \
 	arch-arm/bionic/clone.S \
 	arch-arm/bionic/eabi.c \
 	arch-arm/bionic/ffs.S \
+	arch-arm/bionic/futex_arm.S \
 	arch-arm/bionic/kill.S \
 	arch-arm/bionic/libgcc_compat.c \
 	arch-arm/bionic/tkill.S \
+	arch-arm/bionic/tgkill.S \
 	arch-arm/bionic/memcmp.S \
 	arch-arm/bionic/memcmp16.S \
 	arch-arm/bionic/memcpy.S \
@@ -394,9 +396,9 @@
 	arch-x86/bionic/__get_sp.S \
 	arch-x86/bionic/__get_tls.c \
 	arch-x86/bionic/__set_tls.c \
-	arch-x86/bionic/atomics_x86.S \
 	arch-x86/bionic/clone.S \
 	arch-x86/bionic/_exit_with_stack_teardown.S \
+	arch-x86/bionic/futex_x86.S \
 	arch-x86/bionic/setjmp.S \
 	arch-x86/bionic/_setjmp.S \
 	arch-x86/bionic/sigsetjmp.S \
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 46e7b1f..fa02edc 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -63,6 +63,7 @@
 # see comments in arch-arm/bionic/kill.S to understand why we don't generate an ARM stub for kill/tkill
 int     kill(pid_t, int)           -1,37
 int     tkill(pid_t tid, int sig)  -1,238
+int     tgkill(pid_t tgid, pid_t tid, int sig)  -1,270
 int     __ptrace:ptrace(int request, int pid, void* addr, void* data)  26
 int     __set_thread_area:set_thread_area(void*  user_desc)  -1,243
 int     __getpriority:getpriority(int, int)  96
diff --git a/libc/arch-arm/bionic/atomics_arm.S b/libc/arch-arm/bionic/atomics_arm.S
deleted file mode 100644
index 4d9cbcf..0000000
--- a/libc/arch-arm/bionic/atomics_arm.S
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <sys/linux-syscalls.h>
-#include <machine/asm.h>
-#include <machine/cpu-features.h>
-
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-
-#if defined(__ARM_HAVE_LDREX_STREX)
-/*
- * ===========================================================================
- *      ARMv6+ implementation
- * ===========================================================================
- */
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_dec)
-    mov     r1, r0                      @ copy addr so we don't clobber it
-1:  ldrex   r0, [r1]                    @ load current value into r0
-    sub     r2, r0, #1                  @ generate new value into r2
-    strex   r3, r2, [r1]                @ try to store new value; result in r3
-    cmp     r3, #0                      @ success?
-    bxeq    lr                          @ yes, return
-    b       1b                          @ no, retry
-END(__atomic_dec)
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_inc)
-    mov     r1, r0
-1:  ldrex   r0, [r1]
-    add     r2, r0, #1
-    strex   r3, r2, [r1]
-    cmp     r3, #0
-    bxeq    lr
-    b       1b
-END(__atomic_inc)
-
-/* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-ENTRY(__atomic_cmpxchg)
-1:  mov     ip, #2                      @ ip=2 means "new != old"
-    ldrex   r3, [r2]                    @ load current value into r3
-    teq     r0, r3                      @ new == old?
-    strexeq ip, r1, [r2]                @ yes, try store, set ip to 0 or 1
-    teq     ip, #1                      @ strex failure?
-    beq     1b                          @ yes, retry
-    mov     r0, ip                      @ return 0 on success, 2 on failure
-    bx      lr
-END(__atomic_cmpxchg)
-
-/* r0(new) r1(addr) -> r0(old) */
-ENTRY(__atomic_swap)
-1:  ldrex   r2, [r1]
-    strex   r3, r0, [r1]
-    teq     r3, #0
-    bne     1b
-    mov     r0, r2
-    bx      lr
-END(__atomic_swap)
-
-#else /*not defined __ARM_HAVE_LDREX_STREX*/
-/*
- * ===========================================================================
- *      Pre-ARMv6 implementation
- * ===========================================================================
- */
-
-    /* int __kernel_cmpxchg(int oldval, int newval, int* ptr) */
-    .equ    kernel_cmpxchg, 0xFFFF0FC0
-    .equ    kernel_atomic_base, 0xFFFF0FFF
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_dec)
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0
-1: @ atomic_dec
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-    add     lr, pc, #4
-    sub     r1, r0, #1
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    bcc     1b
-    add     r0, r1, #1
-    ldmia   sp!, {r4, lr}
-    bx      lr
-END(__atomic_dec)
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_inc)
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0
-1: @ atomic_inc
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-    add     lr, pc, #4
-    add     r1, r0, #1
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    bcc     1b
-    sub     r0, r1, #1
-    ldmia   sp!, {r4, lr}
-    bx      lr
-END(__atomic_inc)
-
-/* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-ENTRY(__atomic_cmpxchg)
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r4, r0          /* r4 = save oldvalue */
-1: @ atomic_cmpxchg
-    mov     r3, #kernel_atomic_base
-    add     lr, pc, #4
-    mov     r0, r4          /* r0 = oldvalue */
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    bcs     2f              /* swap was made. we're good, return. */
-    ldr     r3, [r2]        /* swap not made, see if it's because *ptr!=oldvalue */
-    cmp     r3, r4
-    beq     1b
-2: @ atomic_cmpxchg
-    ldmia   sp!, {r4, lr}
-    bx      lr
-END(__atomic_cmpxchg)
-
-/* r0(new) r1(addr) -> r0(old) */
-ENTRY(__atomic_swap)
-    swp     r0, r0, [r1]
-    bx      lr
-END(__atomic_swap)
-
-#endif /*not defined __ARM_HAVE_LDREX_STREX*/
-
-
-/* __futex_wait(*ftx, val, *timespec) */
-/* __futex_wake(*ftx, counter) */
-/* __futex_syscall3(*ftx, op, val) */
-/* __futex_syscall4(*ftx, op, val, *timespec) */
-
-.global __futex_wait
-.type __futex_wait, %function
-
-.global __futex_wake
-.type __futex_wake, %function
-
-.global __futex_syscall3
-.type __futex_syscall3, %function
-
-.global __futex_syscall4
-.type __futex_syscall4, %function
-
-#if __ARM_EABI__
-
-ENTRY(__futex_syscall3)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
-    ldr     r7, =__NR_futex
-    swi     #0
-    ldmia   sp!, {r4, r7}
-    bx      lr
-END(__futex_syscall3)
-
-ENTRY(__futex_wait)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
-    mov     r3, r2
-    mov     r2, r1
-    mov     r1, #FUTEX_WAIT
-    ldr     r7, =__NR_futex
-    swi     #0
-    ldmia   sp!, {r4, r7}
-    bx      lr
-END(__futex_wait)
-
-ENTRY(__futex_wake)
-    .save   {r4, r7}
-    stmdb   sp!, {r4, r7}
-    mov     r2, r1
-    mov     r1, #FUTEX_WAKE
-    ldr     r7, =__NR_futex
-    swi     #0
-    ldmia   sp!, {r4, r7}
-    bx      lr
-END(__futex_wake)
-
-#else
-
-ENTRY(__futex_syscall3)
-    swi     #__NR_futex
-    bx      lr
-END(__futex_syscall3)
-
-ENTRY(__futex_wait)
-    mov     r3, r2
-    mov     r2, r1
-    mov     r1, #FUTEX_WAIT
-    swi     #__NR_futex
-    bx      lr
-END(__futex_wait)
-
-ENTRY(__futex_wake)
-    mov     r2, r1
-    mov     r1, #FUTEX_WAKE
-    swi     #__NR_futex
-    bx      lr
-END(__futex_wake)
-
-#endif
-
-ENTRY(__futex_syscall4)
-    b __futex_syscall3
-END(__futex_syscall4)
diff --git a/libc/arch-arm/bionic/atomics_arm.c b/libc/arch-arm/bionic/atomics_arm.c
new file mode 100644
index 0000000..d69eaff
--- /dev/null
+++ b/libc/arch-arm/bionic/atomics_arm.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/* The purpose of this file is to export a small set of atomic-related
+ * functions from the C library, to ensure binary ABI compatibility for
+ * the NDK.
+ *
+ * These functions were initially exposed by the NDK through <sys/atomics.h>,
+ * which was unfortunate because their implementation didn't provide any
+ * memory barriers at all.
+ *
+ * This wasn't a problem for the platform code that used them, because it
+ * used explicit barrier instructions around them. On the other hand, it means
+ * that any NDK-generated machine code that linked against them would not
+ * perform correctly when running on multi-core devices.
+ *
+ * To fix this, the platform code was first modified to not use any of these
+ * functions (everything is now inlined through assembly statements, see
+ * libc/private/bionic_arm_inline.h and the headers it includes.
+ *
+ * The functions here are thus only for the benefit of NDK applications,
+ * and now includes full memory barriers to prevent any random memory ordering
+ * issue from cropping.
+ *
+ * Note that we also provide an updated <sys/atomics.h> header that defines
+ * always_inlined versions of the functions that use the GCC builtin
+ * intrinsics to perform the same thing.
+ *
+ * NOTE: There is no need for a similar file for non-ARM platforms.
+ */
+
+/* DO NOT INCLUDE <sys/atomics.h> HERE ! */
+
+int
+__atomic_cmpxchg(int old, int _new, volatile int *ptr)
+{
+    /* We must return 0 on success */
+    return __sync_val_compare_and_swap(ptr, old, _new) != old;
+}
+
+int
+__atomic_swap(int _new, volatile int *ptr)
+{
+    int prev;
+    do {
+        prev = *ptr;
+    } while (__sync_val_compare_and_swap(ptr, prev, _new) != prev);
+    return prev;
+}
+
+int
+__atomic_dec(volatile int *ptr)
+{
+  return __sync_fetch_and_sub (ptr, 1);
+}
+
+int
+__atomic_inc(volatile int *ptr)
+{
+  return __sync_fetch_and_add (ptr, 1);
+}
diff --git a/libc/arch-arm/bionic/crtbegin_dynamic.S b/libc/arch-arm/bionic/crtbegin_dynamic.S
index 0999084..6ca0845 100644
--- a/libc/arch-arm/bionic/crtbegin_dynamic.S
+++ b/libc/arch-arm/bionic/crtbegin_dynamic.S
@@ -43,21 +43,17 @@
 #    - address of an "onexit" function, not used on any
 #      platform supported by Bionic
 #
-#    - address of the "main" function of the program. We
-#      can't hard-code it in the adr pseudo instruction
-#      so we use a tiny trampoline that will get relocated
-#      by the dynamic linker before this code runs
+#    - address of the "main" function of the program.
 #
 #    - address of the constructor list
 #
 _start:	
 	mov	r0, sp
 	mov	r1, #0
-	adr r2, 0f
-	adr r3, 1f
-	b	__libc_init
-
-0:  b   main
+	ldr	r2, =main
+	adr	r3, 1f
+	ldr	r4, =__libc_init
+	bx	r4
 
 1:  .long   __PREINIT_ARRAY__
     .long   __INIT_ARRAY__
diff --git a/libc/arch-arm/bionic/crtbegin_static.S b/libc/arch-arm/bionic/crtbegin_static.S
index 13b05b2..e62ac1d 100644
--- a/libc/arch-arm/bionic/crtbegin_static.S
+++ b/libc/arch-arm/bionic/crtbegin_static.S
@@ -43,21 +43,17 @@
 #    - address of an "onexit" function, not used on any
 #      platform supported by Bionic
 #
-#    - address of the "main" function of the program. We
-#      can't hard-code it in the adr pseudo instruction
-#      so we use a tiny trampoline that will get relocated
-#      by the dynamic linker before this code runs
+#    - address of the "main" function of the program.
 #
 #    - address of the constructor list
 #
 _start:	
 	mov	r0, sp
 	mov	r1, #0
-	adr r2, 0f
-	adr r3, 1f
-	b	__libc_init
-
-0:  b   main
+	ldr	r2, =main
+	adr	r3, 1f
+	ldr	r4, =__libc_init
+	bx	r4
 
 1:  .long   __PREINIT_ARRAY__
     .long   __INIT_ARRAY__
diff --git a/libc/arch-arm/bionic/futex_arm.S b/libc/arch-arm/bionic/futex_arm.S
new file mode 100644
index 0000000..7041663
--- /dev/null
+++ b/libc/arch-arm/bionic/futex_arm.S
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/linux-syscalls.h>
+#include <machine/asm.h>
+#include <machine/cpu-features.h>
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+
+/* __futex_wait(*ftx, val, *timespec) */
+/* __futex_wake(*ftx, counter) */
+/* __futex_syscall3(*ftx, op, val) */
+/* __futex_syscall4(*ftx, op, val, *timespec) */
+
+.global __futex_wait
+.type __futex_wait, %function
+
+.global __futex_wake
+.type __futex_wake, %function
+
+.global __futex_syscall3
+.type __futex_syscall3, %function
+
+.global __futex_syscall4
+.type __futex_syscall4, %function
+
+#if __ARM_EABI__
+
+ENTRY(__futex_syscall3)
+    stmdb   sp!, {r4, r7}
+    .save   {r4, r7}
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmia   sp!, {r4, r7}
+    bx      lr
+END(__futex_syscall3)
+
+ENTRY(__futex_wait)
+    stmdb   sp!, {r4, r7}
+    .save   {r4, r7}
+    mov     r3, r2
+    mov     r2, r1
+    mov     r1, #FUTEX_WAIT
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmia   sp!, {r4, r7}
+    bx      lr
+END(__futex_wait)
+
+ENTRY(__futex_wake)
+    .save   {r4, r7}
+    stmdb   sp!, {r4, r7}
+    mov     r2, r1
+    mov     r1, #FUTEX_WAKE
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmia   sp!, {r4, r7}
+    bx      lr
+END(__futex_wake)
+
+#else
+
+ENTRY(__futex_syscall3)
+    swi     #__NR_futex
+    bx      lr
+END(__futex_syscall3)
+
+ENTRY(__futex_wait)
+    mov     r3, r2
+    mov     r2, r1
+    mov     r1, #FUTEX_WAIT
+    swi     #__NR_futex
+    bx      lr
+END(__futex_wait)
+
+ENTRY(__futex_wake)
+    mov     r2, r1
+    mov     r1, #FUTEX_WAKE
+    swi     #__NR_futex
+    bx      lr
+END(__futex_wake)
+
+#endif
+
+ENTRY(__futex_syscall4)
+    b __futex_syscall3
+END(__futex_syscall4)
diff --git a/libc/arch-arm/bionic/tgkill.S b/libc/arch-arm/bionic/tgkill.S
new file mode 100644
index 0000000..da5c0af
--- /dev/null
+++ b/libc/arch-arm/bionic/tgkill.S
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/* unlike our auto-generated syscall stubs, this code saves lr
+   on the stack, as well as a few other registers. this makes
+   our stack unwinder happy, when we generate debug stack
+   traces after the C library or other parts of the system
+   abort due to a fatal runtime error (e.g. detection
+   of a corrupted malloc heap).
+*/
+
+#include <sys/linux-syscalls.h>
+#include <machine/asm.h>
+
+#ifndef __NR_tgkill
+#define __NR_tgkill  268
+#endif
+
+ENTRY(tgkill)
+    stmfd   sp!, {r4-r7, ip, lr}
+    ldr     r7, =__NR_tgkill
+    swi     #0
+    ldmfd   sp!, {r4-r7, ip, lr}
+    movs    r0, r0
+    bxpl    lr
+    b       __set_syscall_errno
+END(tgkill)
diff --git a/libc/arch-sh/syscalls.mk b/libc/arch-sh/syscalls.mk
index 1d87600..d4e4583 100644
--- a/libc/arch-sh/syscalls.mk
+++ b/libc/arch-sh/syscalls.mk
@@ -26,6 +26,7 @@
 syscall_src += arch-sh/syscalls/__brk.S
 syscall_src += arch-sh/syscalls/kill.S
 syscall_src += arch-sh/syscalls/tkill.S
+syscall_src += arch-sh/syscalls/tgkill.S
 syscall_src += arch-sh/syscalls/__ptrace.S
 syscall_src += arch-sh/syscalls/__set_thread_area.S
 syscall_src += arch-sh/syscalls/__getpriority.S
diff --git a/libc/arch-sh/syscalls/tgkill.S b/libc/arch-sh/syscalls/tgkill.S
new file mode 100644
index 0000000..222f836
--- /dev/null
+++ b/libc/arch-sh/syscalls/tgkill.S
@@ -0,0 +1,32 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+
+    .text
+    .type tgkill, @function
+    .globl tgkill
+    .align 4
+
+tgkill:
+
+    /* invoke trap */
+    mov.l   0f, r3  /* trap num */
+    trapa   #(3 + 0x10)
+
+    /* check return value */
+    cmp/pz  r0
+    bt      __NR_tgkill_end
+
+    /* keep error number */
+    sts.l   pr, @-r15
+    mov.l   1f, r1
+    jsr     @r1
+    mov     r0, r4
+    lds.l   @r15+, pr
+
+__NR_tgkill_end:
+    rts
+    nop
+
+    .align  2
+0:  .long   __NR_tgkill
+1:  .long   __set_syscall_errno
diff --git a/libc/arch-x86/bionic/atomics_x86.S b/libc/arch-x86/bionic/futex_x86.S
similarity index 100%
rename from libc/arch-x86/bionic/atomics_x86.S
rename to libc/arch-x86/bionic/futex_x86.S
diff --git a/libc/arch-x86/include/sys/atomics.h b/libc/arch-x86/include/sys/atomics.h
deleted file mode 100644
index 7aed3ae..0000000
--- a/libc/arch-x86/include/sys/atomics.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#ifndef _SYS_ATOMICS_H
-#define _SYS_ATOMICS_H
-
-#include <sys/cdefs.h>
-#include <sys/time.h>
-
-__BEGIN_DECLS
-
-static inline __attribute__((always_inline)) int
-__atomic_cmpxchg(int old, int _new, volatile int *ptr)
-{
-  return !__sync_bool_compare_and_swap (ptr, old, _new);
-}
-
-static inline __attribute__((always_inline)) int
-__atomic_swap(int _new, volatile int *ptr)
-{
-  return __sync_lock_test_and_set(ptr, _new);
-}
-
-static inline __attribute__((always_inline)) int
-__atomic_dec(volatile int *ptr)
-{
-  return __sync_fetch_and_sub (ptr, 1);
-}
-
-static inline __attribute__((always_inline)) int
-__atomic_inc(volatile int *ptr)
-{
-  return __sync_fetch_and_add (ptr, 1);
-}
-
-int __futex_wait(volatile void *ftx, int val, const struct timespec *timeout);
-int __futex_wake(volatile void *ftx, int count);
-
-__END_DECLS
-
-#endif /* _SYS_ATOMICS_H */
diff --git a/libc/arch-x86/string/sse2-memset5-atom.S b/libc/arch-x86/string/sse2-memset5-atom.S
index 4b7f71b..557c019 100644
--- a/libc/arch-x86/string/sse2-memset5-atom.S
+++ b/libc/arch-x86/string/sse2-memset5-atom.S
@@ -93,7 +93,7 @@
 # define SETRTNVAL	movl DEST(%esp), %eax
 #endif
 
-#ifdef SHARED
+#if (defined SHARED || defined __PIC__)
 # define ENTRANCE	PUSH (%ebx);
 # define RETURN_END	POP (%ebx); ret
 # define RETURN		RETURN_END; CFI_PUSH (%ebx)
@@ -313,7 +313,7 @@
 	PUSH (%ebx)
 	mov	$SHARED_CACHE_SIZE, %ebx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
@@ -331,7 +331,7 @@
 # define RESTORE_EBX_STATE CFI_PUSH (%ebx)
 	cmp	$DATA_CACHE_SIZE, %ecx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 #  define RESTORE_EBX_STATE
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
@@ -436,7 +436,7 @@
 	jae	L(128bytesormore_nt)
 	sfence
 L(shared_cache_loop_end):
-#if defined DATA_CACHE_SIZE || !defined SHARED
+#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
 	POP (%ebx)
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
diff --git a/libc/arch-x86/string/ssse3-memcpy5.S b/libc/arch-x86/string/ssse3-memcpy5.S
index b4773df..1bf6d22 100644
--- a/libc/arch-x86/string/ssse3-memcpy5.S
+++ b/libc/arch-x86/string/ssse3-memcpy5.S
@@ -104,7 +104,7 @@
 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
 #define POP(REG)	popl REG; CFI_POP (REG)
 
-#ifdef SHARED
+#if (defined SHARED || defined __PIC__)
 # define PARMS		8		/* Preserve EBX.  */
 # define ENTRANCE	PUSH (%ebx);
 # define RETURN_END	POP (%ebx); ret
@@ -216,7 +216,7 @@
 #ifdef SHARED_CACHE_SIZE_HALF
 	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
@@ -287,7 +287,7 @@
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %ecx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
diff --git a/libc/arch-x86/syscalls.mk b/libc/arch-x86/syscalls.mk
index 3b85025..13edeb0 100644
--- a/libc/arch-x86/syscalls.mk
+++ b/libc/arch-x86/syscalls.mk
@@ -26,6 +26,7 @@
 syscall_src += arch-x86/syscalls/__brk.S
 syscall_src += arch-x86/syscalls/kill.S
 syscall_src += arch-x86/syscalls/tkill.S
+syscall_src += arch-x86/syscalls/tgkill.S
 syscall_src += arch-x86/syscalls/__ptrace.S
 syscall_src += arch-x86/syscalls/__set_thread_area.S
 syscall_src += arch-x86/syscalls/__getpriority.S
diff --git a/libc/arch-x86/syscalls/tgkill.S b/libc/arch-x86/syscalls/tgkill.S
new file mode 100644
index 0000000..99af740
--- /dev/null
+++ b/libc/arch-x86/syscalls/tgkill.S
@@ -0,0 +1,29 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+
+    .text
+    .type tgkill, @function
+    .globl tgkill
+    .align 4
+
+tgkill:
+    pushl   %ebx
+    pushl   %ecx
+    pushl   %edx
+    mov     16(%esp), %ebx
+    mov     20(%esp), %ecx
+    mov     24(%esp), %edx
+    movl    $__NR_tgkill, %eax
+    int     $0x80
+    cmpl    $-129, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %edx
+    popl    %ecx
+    popl    %ebx
+    ret
diff --git a/libc/bionic/atomics_x86.c b/libc/bionic/atomics_x86.c
deleted file mode 100644
index fd60f4f..0000000
--- a/libc/bionic/atomics_x86.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <sys/atomics.h>
-
-#define FUTEX_SYSCALL 240
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-
-int __futex_wait(volatile void *ftx, int val)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAIT),
-          "d" (val),
-          "S" (0)
-    );
-    return ret;
-}
-
-int __futex_wake(volatile void *ftx, int count)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAKE),
-          "d" (count)
-    );
-    return ret;
-}
-
-int __atomic_cmpxchg(int old, int new, volatile int* addr) {
-    int xchg;
-    asm volatile (
-        "lock;"
-        "cmpxchg %%ecx, (%%edx);"
-        "setne %%al;"
-        : "=a" (xchg)
-        : "a" (old),
-          "c" (new),
-          "d" (addr)
-    );
-    return xchg;
-}
-
-int __atomic_swap(int new, volatile int* addr) {
-    int old;
-    asm volatile (
-        "lock;"
-        "xchg %%ecx, (%%edx);"
-        : "=c" (old)
-        : "c" (new),
-          "d" (addr)
-    );
-    return old;
-}
-
-int __atomic_dec(volatile int* addr) {
-    int old;
-    do {
-        old = *addr;
-    } while (atomic_cmpxchg(old, old-1, addr));
-    return old;
-}
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index e8f1052..eeff364 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -81,9 +81,6 @@
 #define PTHREAD_ATTR_FLAG_USER_STACK    0x00000002
 
 #define DEFAULT_STACKSIZE (1024 * 1024)
-#define STACKBASE 0x10000000
-
-static uint8_t * gStackBase = (uint8_t *)STACKBASE;
 
 static pthread_mutex_t mmap_lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -252,7 +249,7 @@
 
     pthread_mutex_lock(&mmap_lock);
 
-    stack = mmap((void *)gStackBase, size,
+    stack = mmap(NULL, size,
                  PROT_READ | PROT_WRITE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
                  -1, 0);
@@ -697,7 +694,7 @@
             goto Exit;
         }
     }
-    while ( __atomic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
+    while ( __bionic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
                               (volatile int*)&thread->attr.flags ) != 0 );
 Exit:
     pthread_mutex_unlock(&gThreadListLock);
@@ -931,17 +928,17 @@
     int  shared = mutex->value & MUTEX_SHARED_MASK;
     /*
      * The common case is an unlocked mutex, so we begin by trying to
-     * change the lock's state from 0 to 1.  __atomic_cmpxchg() returns 0
+     * change the lock's state from 0 to 1.  __bionic_cmpxchg() returns 0
      * if it made the swap successfully.  If the result is nonzero, this
      * lock is already held by another thread.
      */
-    if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
+    if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
         /*
          * We want to go to sleep until the mutex is available, which
          * requires promoting it to state 2.  We need to swap in the new
          * state value and then wait until somebody wakes us up.
          *
-         * __atomic_swap() returns the previous value.  We swap 2 in and
+         * __bionic_swap() returns the previous value.  We swap 2 in and
          * see if we got zero back; if so, we have acquired the lock.  If
          * not, another thread still holds the lock and we wait again.
          *
@@ -952,7 +949,7 @@
          * that the mutex is in state 2 when we go to sleep on it, which
          * guarantees a wake-up call.
          */
-        while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
+        while (__bionic_swap(shared|2, &mutex->value ) != (shared|0))
             __futex_wait_ex(&mutex->value, shared, shared|2, 0);
     }
     ANDROID_MEMBAR_FULL();
@@ -972,10 +969,10 @@
 
     /*
      * The mutex state will be 1 or (rarely) 2.  We use an atomic decrement
-     * to release the lock.  __atomic_dec() returns the previous value;
+     * to release the lock.  __bionic_atomic_dec() returns the previous value;
      * if it wasn't 1 we have to do some additional work.
      */
-    if (__atomic_dec(&mutex->value) != (shared|1)) {
+    if (__bionic_atomic_dec(&mutex->value) != (shared|1)) {
         /*
          * Start by releasing the lock.  The decrement changed it from
          * "contended lock" to "uncontended lock", which means we still
@@ -1163,7 +1160,7 @@
     /* Handle common case first */
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
@@ -1261,13 +1258,13 @@
     if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
     {
         /* fast path for uncontended lock */
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
 
         /* loop while needed */
-        while (__atomic_swap(shared|2, &mutex->value) != (shared|0)) {
+        while (__bionic_swap(shared|2, &mutex->value) != (shared|0)) {
             if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
                 return EBUSY;
 
@@ -1436,7 +1433,7 @@
         long oldval = cond->value;
         long newval = ((oldval - COND_COUNTER_INCREMENT) & COND_COUNTER_MASK)
                       | flags;
-        if (__atomic_cmpxchg(oldval, newval, &cond->value) == 0)
+        if (__bionic_cmpxchg(oldval, newval, &cond->value) == 0)
             break;
     }
 
@@ -1844,7 +1841,7 @@
 }
 
 // man says this should be in <linux/unistd.h>, but it isn't
-extern int tkill(int tid, int sig);
+extern int tgkill(int tgid, int tid, int sig);
 
 int pthread_kill(pthread_t tid, int sig)
 {
@@ -1852,7 +1849,7 @@
     int  old_errno = errno;
     pthread_internal_t * thread = (pthread_internal_t *)tid;
 
-    ret = tkill(thread->kernel_id, sig);
+    ret = tgkill(getpid(), thread->kernel_id, sig);
     if (ret < 0) {
         ret = errno;
         errno = old_errno;
@@ -1861,7 +1858,21 @@
     return ret;
 }
 
-extern int __rt_sigprocmask(int, const sigset_t *, sigset_t *, size_t);
+/* Despite the fact that our kernel headers define sigset_t explicitly
+ * as a 32-bit integer, the kernel system call really expects a 64-bit
+ * bitmap for the signal set, or more exactly an array of two-32-bit
+ * values (see $KERNEL/arch/$ARCH/include/asm/signal.h for details).
+ *
+ * Unfortunately, we cannot fix the sigset_t definition without breaking
+ * the C library ABI, so perform a little runtime translation here.
+ */
+typedef union {
+    sigset_t   bionic;
+    uint32_t   kernel[2];
+} kernel_sigset_t;
+
+/* this is a private syscall stub */
+extern int __rt_sigprocmask(int, const kernel_sigset_t *, kernel_sigset_t *, size_t);
 
 int pthread_sigmask(int how, const sigset_t *set, sigset_t *oset)
 {
@@ -1870,16 +1881,31 @@
      */
     int ret, old_errno = errno;
 
-    /* Use NSIG which corresponds to the number of signals in
-     * our 32-bit sigset_t implementation. As such, this function, or
-     * anything that deals with sigset_t cannot manage real-time signals
-     * (signo >= 32). We might want to introduce sigset_rt_t as an
-     * extension to do so in the future.
+    /* We must convert *set into a kernel_sigset_t */
+    kernel_sigset_t  in_set, *in_set_ptr;
+    kernel_sigset_t  out_set;
+
+    in_set.kernel[0]  = in_set.kernel[1]  =  0;
+    out_set.kernel[0] = out_set.kernel[1] = 0;
+
+    /* 'in_set_ptr' is the second parameter to __rt_sigprocmask. It must be NULL
+     * if 'set' is NULL to ensure correct semantics (which in this case would
+     * be to ignore 'how' and return the current signal set into 'oset'.
      */
-    ret = __rt_sigprocmask(how, set, oset, NSIG / 8);
+    if (set == NULL) {
+        in_set_ptr = NULL;
+    } else {
+        in_set.bionic = *set;
+        in_set_ptr = &in_set;
+    }
+
+    ret = __rt_sigprocmask(how, in_set_ptr, &out_set, sizeof(kernel_sigset_t));
     if (ret < 0)
         ret = errno;
 
+    if (oset)
+        *oset = out_set.bionic;
+
     errno = old_errno;
     return ret;
 }
diff --git a/libc/bionic/semaphore.c b/libc/bionic/semaphore.c
index 96819ae..9bc8412 100644
--- a/libc/bionic/semaphore.c
+++ b/libc/bionic/semaphore.c
@@ -174,7 +174,7 @@
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
     return ret;
@@ -198,7 +198,7 @@
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
 
@@ -235,7 +235,7 @@
         else
             new = SEMCOUNT_INCREMENT(old);
     }
-    while ( __atomic_cmpxchg((int)(old|shared),
+    while ( __bionic_cmpxchg((int)(old|shared),
                              (int)(new|shared),
                              (volatile int*)pvalue) != 0);
 
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 9d05769..2015ac0 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -42,9 +42,13 @@
     int volatile value;
 } pthread_mutex_t;
 
-#define  PTHREAD_MUTEX_INITIALIZER             {0}
-#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {0x4000}
-#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {0x8000}
+#define  __PTHREAD_MUTEX_INIT_VALUE            0
+#define  __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE  0x4000
+#define  __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE 0x8000
+
+#define  PTHREAD_MUTEX_INITIALIZER             {__PTHREAD_MUTEX_INIT_VALUE}
+#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {__PTHREAD_RECURSIVE_MUTEX_INIT_VALUE}
+#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {__PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE}
 
 enum {
     PTHREAD_MUTEX_NORMAL = 0,
diff --git a/libc/include/sys/atomics.h b/libc/include/sys/atomics.h
index d3fa145..3ada8de 100644
--- a/libc/include/sys/atomics.h
+++ b/libc/include/sys/atomics.h
@@ -33,10 +33,48 @@
 
 __BEGIN_DECLS
 
-extern int __atomic_cmpxchg(int old, int _new, volatile int *ptr);
-extern int __atomic_swap(int _new, volatile int *ptr);
-extern int __atomic_dec(volatile int *ptr);
-extern int __atomic_inc(volatile int *ptr);
+/* Note: atomic operations that were exported by the C library didn't
+ *       provide any memory barriers, which created potential issues on
+ *       multi-core devices. We now define them as inlined calls to
+ *       GCC sync builtins, which always provide a full barrier.
+ *
+ *       NOTE: The C library still exports atomic functions by the same
+ *              name to ensure ABI stability for existing NDK machine code.
+ *
+ *       If you are an NDK developer, we encourage you to rebuild your
+ *       unmodified sources against this header as soon as possible.
+ */
+#define __ATOMIC_INLINE__ static __inline__ __attribute__((always_inline))
+
+__ATOMIC_INLINE__ int
+__atomic_cmpxchg(int old, int _new, volatile int *ptr)
+{
+    /* We must return 0 on success */
+    return __sync_val_compare_and_swap(ptr, old, _new) != old;
+}
+
+__ATOMIC_INLINE__ int
+__atomic_swap(int _new, volatile int *ptr)
+{
+    int prev;
+    do {
+        prev = *ptr;
+    } while (__sync_val_compare_and_swap(ptr, prev, _new) != prev);
+    return prev;
+}
+
+__ATOMIC_INLINE__ int
+__atomic_dec(volatile int *ptr)
+{
+  return __sync_fetch_and_sub (ptr, 1);
+}
+
+__ATOMIC_INLINE__ int
+__atomic_inc(volatile int *ptr)
+{
+  return __sync_fetch_and_add (ptr, 1);
+}
+
 
 int __futex_wait(volatile void *ftx, int val, const struct timespec *timeout);
 int __futex_wake(volatile void *ftx, int count);
diff --git a/libc/include/sys/linux-syscalls.h b/libc/include/sys/linux-syscalls.h
index 7b74a4b..930508a 100644
--- a/libc/include/sys/linux-syscalls.h
+++ b/libc/include/sys/linux-syscalls.h
@@ -198,6 +198,7 @@
 #define __NR_waitid                       (__NR_SYSCALL_BASE + 284)
 #define __NR_kill                         (__NR_SYSCALL_BASE + 37)
 #define __NR_tkill                        (__NR_SYSCALL_BASE + 238)
+#define __NR_tgkill                       (__NR_SYSCALL_BASE + 270)
 #define __NR_set_thread_area              (__NR_SYSCALL_BASE + 243)
 #define __NR_openat                       (__NR_SYSCALL_BASE + 295)
 #define __NR_madvise                      (__NR_SYSCALL_BASE + 219)
@@ -242,6 +243,7 @@
 #define __NR_waitid                       (__NR_SYSCALL_BASE + 284)
 #define __NR_kill                         (__NR_SYSCALL_BASE + 37)
 #define __NR_tkill                        (__NR_SYSCALL_BASE + 238)
+#define __NR_tgkill                       (__NR_SYSCALL_BASE + 270)
 #define __NR_set_thread_area              (__NR_SYSCALL_BASE + 243)
 #define __NR_vfork                        (__NR_SYSCALL_BASE + 190)
 #define __NR_openat                       (__NR_SYSCALL_BASE + 295)
diff --git a/libc/include/sys/linux-unistd.h b/libc/include/sys/linux-unistd.h
index ae9077f..f463127 100644
--- a/libc/include/sys/linux-unistd.h
+++ b/libc/include/sys/linux-unistd.h
@@ -32,6 +32,7 @@
 void*            __brk (void*);
 int              kill (pid_t, int);
 int              tkill (pid_t tid, int sig);
+int              tgkill (pid_t tgid, pid_t tid, int sig);
 int              __ptrace (int request, int pid, void* addr, void* data);
 int              __set_thread_area (void*  user_desc);
 int              __getpriority (int, int);
diff --git a/libc/kernel/common/linux/capability.h b/libc/kernel/common/linux/capability.h
index 605bc27..7350e7c 100644
--- a/libc/kernel/common/linux/capability.h
+++ b/libc/kernel/common/linux/capability.h
@@ -7,89 +7,114 @@
  ***   structures, and macros generated from the original header, and thus,
  ***   contains no copyrightable information.
  ***
+ ***   To edit the content of this header, modify the corresponding
+ ***   source file (e.g. under external/kernel-headers/original/) then
+ ***   run bionic/libc/kernel/tools/update_all.py
+ ***
+ ***   Any manual change here will be lost the next time this script will
+ ***   be run. You've been warned!
+ ***
  ****************************************************************************
  ****************************************************************************/
 #ifndef _LINUX_CAPABILITY_H
 #define _LINUX_CAPABILITY_H
-
 #include <linux/types.h>
-#include <linux/compiler.h>
-
-#define _LINUX_CAPABILITY_VERSION 0x19980330
-
+struct task_struct;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define _LINUX_CAPABILITY_VERSION_1 0x19980330
+#define _LINUX_CAPABILITY_U32S_1 1
+#define _LINUX_CAPABILITY_VERSION_2 0x20071026  
+#define _LINUX_CAPABILITY_U32S_2 2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define _LINUX_CAPABILITY_VERSION_3 0x20080522
+#define _LINUX_CAPABILITY_U32S_3 2
 typedef struct __user_cap_header_struct {
  __u32 version;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  int pid;
 } __user *cap_user_header_t;
-
 typedef struct __user_cap_data_struct {
  __u32 effective;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  __u32 permitted;
  __u32 inheritable;
 } __user *cap_user_data_t;
-
+#define VFS_CAP_REVISION_MASK 0xFF000000
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define VFS_CAP_REVISION_SHIFT 24
+#define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK
+#define VFS_CAP_FLAGS_EFFECTIVE 0x000001
+#define VFS_CAP_REVISION_1 0x01000000
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define VFS_CAP_U32_1 1
+#define XATTR_CAPS_SZ_1 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_1))
+#define VFS_CAP_REVISION_2 0x02000000
+#define VFS_CAP_U32_2 2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))
+#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2
+#define VFS_CAP_U32 VFS_CAP_U32_2
+#define VFS_CAP_REVISION VFS_CAP_REVISION_2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct vfs_cap_data {
+ __le32 magic_etc;
+ struct {
+ __le32 permitted;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __le32 inheritable;
+ } data[VFS_CAP_U32];
+};
+#define _LINUX_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_1
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define _LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_1
 #define CAP_CHOWN 0
-
 #define CAP_DAC_OVERRIDE 1
-
 #define CAP_DAC_READ_SEARCH 2
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_FOWNER 3
-
 #define CAP_FSETID 4
-
-#define CAP_FS_MASK 0x1f
-
 #define CAP_KILL 5
-
 #define CAP_SETGID 6
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_SETUID 7
-
 #define CAP_SETPCAP 8
-
 #define CAP_LINUX_IMMUTABLE 9
-
 #define CAP_NET_BIND_SERVICE 10
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_NET_BROADCAST 11
-
 #define CAP_NET_ADMIN 12
-
 #define CAP_NET_RAW 13
-
 #define CAP_IPC_LOCK 14
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_IPC_OWNER 15
-
 #define CAP_SYS_MODULE 16
-
 #define CAP_SYS_RAWIO 17
-
 #define CAP_SYS_CHROOT 18
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_SYS_PTRACE 19
-
 #define CAP_SYS_PACCT 20
-
 #define CAP_SYS_ADMIN 21
-
 #define CAP_SYS_BOOT 22
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_SYS_NICE 23
-
 #define CAP_SYS_RESOURCE 24
-
 #define CAP_SYS_TIME 25
-
 #define CAP_SYS_TTY_CONFIG 26
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_MKNOD 27
-
 #define CAP_LEASE 28
-
 #define CAP_AUDIT_WRITE 29
-
 #define CAP_AUDIT_CONTROL 30
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CAP_SETFCAP 31
+#define CAP_MAC_OVERRIDE 32
+#define CAP_MAC_ADMIN 33
+#define CAP_SYSLOG 34
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CAP_WAKE_ALARM 35
+#define CAP_LAST_CAP CAP_WAKE_ALARM
+#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
+#define CAP_TO_INDEX(x) ((x) >> 5)  
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CAP_TO_MASK(x) (1 << ((x) & 31))  
 #endif
diff --git a/libc/netbsd/net/getaddrinfo.c b/libc/netbsd/net/getaddrinfo.c
index ace8c1a..6ae6e3e 100644
--- a/libc/netbsd/net/getaddrinfo.c
+++ b/libc/netbsd/net/getaddrinfo.c
@@ -1867,6 +1867,19 @@
 	free(elems);
 }
 
+static int _using_alt_dns()
+{
+	char propname[PROP_NAME_MAX];
+	char propvalue[PROP_VALUE_MAX];
+
+	propvalue[0] = 0;
+	snprintf(propname, sizeof(propname), "net.dns1.%d", getpid());
+	if (__system_property_get(propname, propvalue) > 0 ) {
+		return 1;
+	}
+	return 0;
+}
+
 /*ARGSUSED*/
 static int
 _dns_getaddrinfo(void *rv, void	*cb_data, va_list ap)
@@ -1909,14 +1922,12 @@
 		q.anslen = sizeof(buf->buf);
 		int query_ipv6 = 1, query_ipv4 = 1;
 		if (pai->ai_flags & AI_ADDRCONFIG) {
-			query_ipv6 = _have_ipv6();
-			query_ipv4 = _have_ipv4();
-			if (query_ipv6 == 0 && query_ipv4 == 0) {
-				// Both our IPv4 and IPv6 connectivity probes failed, which indicates
-				// that we have neither an IPv4 or an IPv6 default route (and thus no
-				// global IPv4 or IPv6 connectivity). We might be in a walled garden.
-				// Throw up our arms and ask for both A and AAAA.
-				query_ipv6 = query_ipv4 = 1;
+			// Only implement AI_ADDRCONFIG if the application is not
+			// using its own DNS servers, since our implementation
+			// only works on the default connection.
+			if (!_using_alt_dns()) {
+				query_ipv6 = _have_ipv6();
+				query_ipv4 = _have_ipv4();
 			}
 		}
 		if (query_ipv6) {
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
new file mode 100644
index 0000000..275c1c9
--- /dev/null
+++ b/libc/private/bionic_atomic_arm.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_ARM_H
+#define BIONIC_ATOMIC_ARM_H
+
+#include <machine/cpu-features.h>
+
+/* Some of the harware instructions used below are not available in Thumb-1
+ * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
+ * problem, we're going to use the same technique than libatomics_ops,
+ * which is to temporarily switch to ARM, do the operation, then switch
+ * back to Thumb-1.
+ *
+ * This results in two 'bx' jumps, just like a normal function call, but
+ * everything is kept inlined, avoids loading or computing the function's
+ * address, and prevents a little I-cache trashing too.
+ *
+ * However, it is highly recommended to avoid compiling any C library source
+ * file that use these functions in Thumb-1 mode.
+ *
+ * Define three helper macros to implement this:
+ */
+#if defined(__thumb__) && !defined(__thumb2__)
+#  define  __ATOMIC_SWITCH_TO_ARM \
+            "adr r3, 5f\n" \
+            "bx  r3\n" \
+            ".align\n" \
+            ".arm\n" \
+        "5:\n"
+/* note: the leading \n below is intentional */
+#  define __ATOMIC_SWITCH_TO_THUMB \
+            "\n" \
+            "adr r3, 6f\n" \
+            "bx  r3\n" \
+            ".thumb" \
+        "6:\n"
+
+#  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */
+
+/* Warn the user that ARM mode should really be preferred! */
+#  warning Rebuilding this source file in ARM mode is highly recommended for performance!!
+
+#else
+#  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
+#  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
+#  define  __ATOMIC_CLOBBERS        /* nothing */
+#endif
+
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device. For the record, using a 'dmb'
+ * instruction on a Nexus One device can take up to 180 ns even if
+ * it is completely un-necessary on this device.
+ *
+ * NOTE: This is where the platform and NDK headers atomic headers are
+ *        going to diverge. With the NDK, we don't know if the generated
+ *        code is going to run on a single or multi-core device, so we
+ *        need to be cautious.
+ *
+ *        Fortunately, we can use the kernel helper function that is
+ *        mapped at address 0xffff0fa0 in all user process, and that
+ *        provides a device-specific barrier operation.
+ *
+ *        I.e. on single-core devices, the helper immediately returns,
+ *        on multi-core devices, it uses "dmb" or any other means to
+ *        perform a full-memory barrier.
+ *
+ * There are three cases to consider for the platform:
+ *
+ *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
+ *    - multi-core ARMv6         => use the coprocessor
+ *    - single core ARMv5TE/6/7  => do not use any hardware barrier
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+
+/* Sanity check, multi-core is only supported starting from ARMv6 */
+#  if __ARM_ARCH__ < 6
+#    error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
+#  endif
+
+#  ifdef __ARM_HAVE_DMB
+/* For ARMv7-A, we can use the 'dmb' instruction directly */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
+     * bother with __ATOMIC_SWITCH_TO_ARM */
+    __asm__ __volatile__ ( "dmb" : : : "memory" );
+}
+#  else /* !__ARM_HAVE_DMB */
+/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
+ * which requires the use of a general-purpose register, which is slightly
+ * less efficient.
+ */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __asm__ __volatile__ (
+        __SWITCH_TO_ARM
+        "mcr p15, 0, %0, c7, c10, 5"
+        __SWITCH_TO_THUMB
+        : : "r" (0) : __ATOMIC_CLOBBERS "memory");
+}
+#  endif /* !__ARM_HAVE_DMB */
+#else /* !ANDROID_SMP */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif /* !ANDROID_SMP */
+
+/* Compare-and-swap, without any explicit barriers. Note that this functions
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ *
+ * There are two cases to consider:
+ *
+ *     - ARMv6+  => use LDREX/STREX instructions
+ *     - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
+ *
+ * LDREX/STREX are only available starting from ARMv6
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "mov %1, #0\n"
+            "teq %0, %4\n"
+#ifdef __thumb2__
+            "it eq\n"
+#endif
+            "strexeq %1, %5, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr), "Ir" (old_value), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev != old_value;
+}
+#  else /* !__ARM_HAVE_LDREX_STREX */
+
+/* Use the handy kernel helper function mapped at 0xffff0fc0 */
+typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+
+__ATOMIC_INLINE__ int
+__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* Note: the kernel function returns 0 on success too */
+    return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    return __kernel_cmpxchg(old_value, new_value, ptr);
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Swap operation, without any explicit barriers.
+ * There are again two similar cases to consider:
+ *
+ *   ARMv6+ => use LDREX/STREX
+ *   < ARMv6 => use SWP instead.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "strex %1, %4, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+            : "r" (ptr), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else /* !__ARM_HAVE_LDREX_STREX */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    /* NOTE: SWP is available in Thumb-1 too */
+    __asm__ __volatile__ ("swp %0, %2, [%3]"
+                          : "=&r" (prev), "+m" (*ptr)
+                          : "r" (new_value), "r" (ptr)
+                          : "cc");
+    return prev;
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Atomic increment - without any barriers
+ * This returns the old value
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "add %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev+1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+/* Atomic decrement - without any barriers
+ * This returns the old value.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "sub %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev-1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#endif /* SYS_ATOMICS_ARM_H */
diff --git a/libc/private/bionic_atomic_gcc_builtin.h b/libc/private/bionic_atomic_gcc_builtin.h
new file mode 100644
index 0000000..e7c5761
--- /dev/null
+++ b/libc/private/bionic_atomic_gcc_builtin.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_GCC_BUILTIN_H
+#define BIONIC_ATOMIC_GCC_BUILTIN_H
+
+/* This header file is used by default if we don't have optimized atomic
+ * routines for a given platform. See bionic_atomic_arm.h and
+ * bionic_atomic_x86.h for examples.
+ */
+
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __sync_synchronize();
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* We must return 0 on success */
+    return __sync_bool_compare_and_swap(ptr, old_value, new_value) == 0;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    do {
+        prev = *ptr;
+        status = __sync_val_compare_and_swap(ptr, prev, new_value);
+    } while (status == 0);
+    return prev;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, 1);
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, -1);
+}
+
+#endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */
diff --git a/libc/private/bionic_atomic_inline.h b/libc/private/bionic_atomic_inline.h
index 95766e1..821ad39 100644
--- a/libc/private/bionic_atomic_inline.h
+++ b/libc/private/bionic_atomic_inline.h
@@ -43,62 +43,21 @@
 extern "C" {
 #endif
 
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
+/* Define __ATOMIC_INLINE__ to control the inlining of all atomics
+ * functions declared here. For a slight performance boost, we want
+ * all of them to be always_inline
  */
+#define  __ATOMIC_INLINE__  static __inline__ __attribute__((always_inline))
 
 #ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
+#  include <bionic_atomic_arm.h>
+#elif defined(__i386__)
+#  include <bionic_atomic_x86.h>
 #else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
+#  include <bionic_atomic_gcc_builtin.h>
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
-#else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
-#endif
-
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
-#else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
-#endif
+#define ANDROID_MEMBAR_FULL  __bionic_memory_barrier
 
 #ifdef __cplusplus
 } // extern "C"
diff --git a/libc/private/bionic_atomic_x86.h b/libc/private/bionic_atomic_x86.h
new file mode 100644
index 0000000..aca0c4b
--- /dev/null
+++ b/libc/private/bionic_atomic_x86.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_X86_H
+#define BIONIC_ATOMIC_X86_H
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device.
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    __asm__ __volatile__ ( "mfence" : : : "memory" );
+}
+#else
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif
+
+/* Compare-and-swap, without any explicit barriers. Note that this function
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ */
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+                          : "=a" (prev)
+                          : "q" (new_value), "m" (*ptr), "0" (old_value)
+                          : "memory");
+    return prev != old_value;
+}
+
+
+/* Swap, without any explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("xchgl %1, %0"
+                          : "=r" (new_value)
+                          : "m" (*ptr), "0" (new_value)
+                          : "memory");
+    return new_value;
+}
+
+/* Atomic increment, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t *ptr)
+{
+    int increment = 1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+/* Atomic decrement, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t *ptr)
+{
+    int increment = -1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+#endif /* BIONIC_ATOMIC_X86_H */
diff --git a/libc/stdio/fileext.h b/libc/stdio/fileext.h
index 2d07043..b36a448 100644
--- a/libc/stdio/fileext.h
+++ b/libc/stdio/fileext.h
@@ -29,24 +29,41 @@
  * $Citrus$
  */
 
+#include <pthread.h>
+#include "wcio.h"
+
 /*
  * file extension
  */
 struct __sfileext {
 	struct	__sbuf _ub; /* ungetc buffer */
 	struct wchar_io_data _wcio;	/* wide char io status */
+	pthread_mutex_t _lock; /* file lock */
 };
 
+#define _FILEEXT_INITIALIZER  {{NULL,0},{0},PTHREAD_RECURSIVE_MUTEX_INITIALIZER}
+
 #define _EXT(fp) ((struct __sfileext *)((fp)->_ext._base))
 #define _UB(fp) _EXT(fp)->_ub
+#define _FLOCK(fp)  _EXT(fp)->_lock
 
 #define _FILEEXT_INIT(fp) \
 do { \
 	_UB(fp)._base = NULL; \
 	_UB(fp)._size = 0; \
 	WCIO_INIT(fp); \
+	_FLOCK_INIT(fp); \
 } while (0)
 
+/* Helper macros to avoid a function call when you know that fp is not NULL.
+ * Notice that we keep _FLOCK_INIT() fast by slightly breaking our pthread
+ * encapsulation.
+ */
+#define _FLOCK_INIT(fp)    _FLOCK(fp).value = __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE
+#define _FLOCK_LOCK(fp)    pthread_mutex_lock(&_FLOCK(fp))
+#define _FLOCK_TRYLOCK(fp) pthread_mutex_trylock(&_FLOCK(fp))
+#define _FLOCK_UNLOCK(fp)  pthread_mutex_unlock(&_FLOCK(fp))
+
 #define _FILEEXT_SETUP(f, fext) \
 do { \
 	(f)->_ext._base = (unsigned char *)(fext); \
diff --git a/libc/stdio/findfp.c b/libc/stdio/findfp.c
index a659c87..76ed5ee 100644
--- a/libc/stdio/findfp.c
+++ b/libc/stdio/findfp.c
@@ -58,7 +58,12 @@
 static struct glue *lastglue = &uglue;
 _THREAD_PRIVATE_MUTEX(__sfp_mutex);
 
-static struct __sfileext __sFext[3];
+static struct __sfileext __sFext[3] = {
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+};
+
 FILE __sF[3] = {
 	std(__SRD, STDIN_FILENO),		/* stdin */
 	std(__SWR, STDOUT_FILENO),		/* stdout */
diff --git a/libc/stdio/flockfile.c b/libc/stdio/flockfile.c
index e8c74c5..368fb15 100644
--- a/libc/stdio/flockfile.c
+++ b/libc/stdio/flockfile.c
@@ -31,122 +31,23 @@
  * we can't use the OpenBSD implementation which uses kernel-specific
  * APIs not available on Linux.
  *
- * Ideally, this would be trivially implemented by adding a
- * pthread_mutex_t field to struct __sFILE as defined in
- * <stdio.h>.
- *
- * However, since we don't want to bring pthread into the mix
- * as well as change the size of a public API/ABI structure,
- * we're going to store the data out-of-band.
- *
- * we use a hash-table to map FILE* pointers to recursive mutexes
- * fclose() will call __fremovelock() defined below to remove
- * a pointer from the table.
+ * Instead, we use a pthread_mutex_t within the FILE* internal state.
+ * See fileext.h for details.
  *
  * the behaviour, if fclose() is called while the corresponding
  * file is locked is totally undefined.
  */
 #include <stdio.h>
-#include <pthread.h>
 #include <string.h>
+#include <errno.h>
+#include "fileext.h"
 
-/* a node in the hash table */
-typedef struct FileLock {
-    struct FileLock*  next;
-    FILE*             file;
-    pthread_mutex_t   mutex;
-} FileLock;
-
-/* use a static hash table. We assume that we're not going to
- * lock a really large number of FILE* objects on an embedded
- * system.
- */
-#define  FILE_LOCK_BUCKETS  32
-
-typedef struct {
-    pthread_mutex_t   lock;
-    FileLock*         buckets[ FILE_LOCK_BUCKETS ];
-} LockTable;
-
-static LockTable*      _lockTable;
-static pthread_once_t  _lockTable_once = PTHREAD_ONCE_INIT;
-
-static void
-lock_table_init( void )
-{
-    _lockTable = malloc(sizeof(*_lockTable));
-    if (_lockTable != NULL) {
-        pthread_mutex_init(&_lockTable->lock, NULL);
-        memset(_lockTable->buckets, 0, sizeof(_lockTable->buckets));
-    }
-}
-
-static LockTable*
-lock_table_lock( void )
-{
-    pthread_once( &_lockTable_once, lock_table_init );
-    pthread_mutex_lock( &_lockTable->lock );
-    return _lockTable;
-}
-
-static void
-lock_table_unlock( LockTable*  t )
-{
-    pthread_mutex_unlock( &t->lock );
-}
-
-static FileLock**
-lock_table_lookup( LockTable*  t, FILE*  f )
-{
-    uint32_t    hash = (uint32_t)(void*)f;
-    FileLock**  pnode;
-
-    hash = (hash >> 2) ^ (hash << 17);
-    pnode = &t->buckets[hash % FILE_LOCK_BUCKETS];
-    for (;;) {
-        FileLock*  node = *pnode;
-        if (node == NULL || node->file == f)
-            break;
-        pnode = &node->next;
-    }
-    return pnode;
-}
 
 void
 flockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock == NULL) {
-            pthread_mutexattr_t  attr;
-
-            /* create a new node in the hash table */
-            lock = malloc(sizeof(*lock));
-            if (lock == NULL) {
-                lock_table_unlock(t);
-                return;
-            }
-            lock->next        = NULL;
-            lock->file        = fp;
-
-            pthread_mutexattr_init(&attr);
-            pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
-            pthread_mutex_init( &lock->mutex, &attr );
-
-            *lookup           = lock;
-        }
-        lock_table_unlock(t);
-
-        /* we assume that another thread didn't destroy 'lock'
-        * by calling fclose() on the FILE*. This can happen if
-        * the client is *really* buggy, but we don't care about
-        * such code here.
-        */
-        pthread_mutex_lock(&lock->mutex);
+    if (fp != NULL) {
+        _FLOCK_LOCK(fp);
     }
 }
 
@@ -154,21 +55,13 @@
 int
 ftrylockfile(FILE *fp)
 {
-    int         ret = -1;
-    LockTable*  t   = lock_table_lock();
+    /* The specification for ftrylockfile() says it returns 0 on success,
+     * or non-zero on error. So return an errno code directly on error.
+     */
+    int  ret = EINVAL;
 
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        lock_table_unlock(t);
-
-        /* see above comment about why we assume that 'lock' can
-        * be accessed from here
-        */
-        if (lock != NULL && !pthread_mutex_trylock(&lock->mutex)) {
-            ret = 0;  /* signal success */
-        }
+    if (fp != NULL) {
+        ret = _FLOCK_TRYLOCK(fp);
     }
     return ret;
 }
@@ -176,35 +69,7 @@
 void
 funlockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL)
-            pthread_mutex_unlock(&lock->mutex);
-
-        lock_table_unlock(t);
-    }
-}
-
-
-/* called from fclose() to remove the file lock */
-__LIBC_HIDDEN__ void
-__fremovelock(FILE*  fp)
-{
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL) {
-            *lookup   = lock->next;
-            lock->file = NULL;
-        }
-        lock_table_unlock(t);
-        free(lock);
+    if (fp != NULL) {
+        _FLOCK_UNLOCK(fp);
     }
 }
diff --git a/libc/tools/zoneinfo/generate b/libc/tools/zoneinfo/generate
index e48a3c9..3e21d0b 100755
--- a/libc/tools/zoneinfo/generate
+++ b/libc/tools/zoneinfo/generate
@@ -37,6 +37,9 @@
   exit 1
 fi
 
+md5_sum=`md5sum $latest_archive`
+echo "MD5: $md5_sum"
+
 echo "Extracting $latest_version..."
 mkdir $latest_version
 tar -C $latest_version -zxf $latest_archive
diff --git a/libc/zoneinfo/zoneinfo.dat b/libc/zoneinfo/zoneinfo.dat
index e41422e..49b99ec 100644
--- a/libc/zoneinfo/zoneinfo.dat
+++ b/libc/zoneinfo/zoneinfo.dat
Binary files differ
diff --git a/libc/zoneinfo/zoneinfo.idx b/libc/zoneinfo/zoneinfo.idx
index eb02e80..c9eefc6 100644
--- a/libc/zoneinfo/zoneinfo.idx
+++ b/libc/zoneinfo/zoneinfo.idx
Binary files differ
diff --git a/libc/zoneinfo/zoneinfo.version b/libc/zoneinfo/zoneinfo.version
index 6140b76..c4f4922 100644
--- a/libc/zoneinfo/zoneinfo.version
+++ b/libc/zoneinfo/zoneinfo.version
@@ -1 +1 @@
-2011l
+2011n
diff --git a/libstdc++/src/one_time_construction.cpp b/libstdc++/src/one_time_construction.cpp
index 2a44c79..f3d7138 100644
--- a/libstdc++/src/one_time_construction.cpp
+++ b/libstdc++/src/one_time_construction.cpp
@@ -20,11 +20,11 @@
     // 6 untouched, wait and return 0
     // 1 untouched, return 0
 retry:
-    if (__atomic_cmpxchg(0, 0x2, gv) == 0) {
+    if (__bionic_cmpxchg(0, 0x2, gv) == 0) {
         ANDROID_MEMBAR_FULL();
         return 1;
     }
-    __atomic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
+    __bionic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
     __futex_wait(gv, 0x6, NULL);
 
     if(*gv != 1) // __cxa_guard_abort was called, let every thread try since there is no return code for this condition
@@ -39,7 +39,7 @@
     // 2 -> 1
     // 6 -> 1, and wake
     ANDROID_MEMBAR_FULL();
-    if (__atomic_cmpxchg(0x2, 0x1, gv) == 0) {
+    if (__bionic_cmpxchg(0x2, 0x1, gv) == 0) {
         return;
     }
 
diff --git a/linker/Android.mk b/linker/Android.mk
index 60f291c..74c7453 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -10,27 +10,9 @@
 	dlfcn.c \
 	debugger.c
 
-ifeq ($(TARGET_ARCH),sh)
-# SH-4A series virtual address range from 0x00000000 to 0x7FFFFFFF.
-LINKER_TEXT_BASE := 0x70000100
-else
-# This is aligned to 4K page boundary so that both GNU ld and gold work.  Gold
-# actually produces a correct binary with starting address 0xB0000100 but the
-# extra objcopy step to rename symbols causes the resulting binary to be misaligned
-# and unloadable.  Increasing the alignment adds an extra 3840 bytes in padding
-# but switching to gold saves about 1M of space.
-LINKER_TEXT_BASE := 0xB0001000
-endif
+LOCAL_LDFLAGS := -shared
 
-# The maximum size set aside for the linker, from
-# LINKER_TEXT_BASE rounded down to a megabyte.
-LINKER_AREA_SIZE := 0x01000000
-
-LOCAL_LDFLAGS := -Wl,-Ttext,$(LINKER_TEXT_BASE)
-
-LOCAL_CFLAGS += -DPRELINK
-LOCAL_CFLAGS += -DLINKER_TEXT_BASE=$(LINKER_TEXT_BASE)
-LOCAL_CFLAGS += -DLINKER_AREA_SIZE=$(LINKER_AREA_SIZE)
+LOCAL_CFLAGS += -fno-stack-protector
 
 # Set LINKER_DEBUG to either 1 or 0
 #
@@ -75,6 +57,9 @@
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE_SUFFIX := $(TARGET_EXECUTABLE_SUFFIX)
 
+# we don't want crtbegin.o (because we have begin.o), so unset it
+# just for this module
+LOCAL_NO_CRT := true
 
 include $(BUILD_SYSTEM)/dynamic_binary.mk
 
@@ -86,11 +71,3 @@
 #
 # end of BUILD_EXECUTABLE hack
 #
-
-# we don't want crtbegin.o (because we have begin.o), so unset it
-# just for this module
-$(LOCAL_BUILT_MODULE): TARGET_CRTBEGIN_STATIC_O :=
-# This line is not strictly necessary because the dynamic linker is built
-# as a static executable, but it won't hurt if in the future we start
-# building the linker as a dynamic one.
-$(LOCAL_BUILT_MODULE): TARGET_CRTBEGIN_DYNAMIC_O :=
diff --git a/linker/linker.c b/linker/linker.c
index 17008f8..a188b5c 100644
--- a/linker/linker.c
+++ b/linker/linker.c
@@ -313,15 +313,6 @@
     freelist = si;
 }
 
-#ifndef LINKER_TEXT_BASE
-#error "linker's makefile must define LINKER_TEXT_BASE"
-#endif
-#ifndef LINKER_AREA_SIZE
-#error "linker's makefile must define LINKER_AREA_SIZE"
-#endif
-#define LINKER_BASE ((LINKER_TEXT_BASE) & 0xfff00000)
-#define LINKER_TOP  (LINKER_BASE + (LINKER_AREA_SIZE))
-
 const char *addr_to_name(unsigned addr)
 {
     soinfo *si;
@@ -332,10 +323,6 @@
         }
     }
 
-    if((addr >= LINKER_BASE) && (addr < LINKER_TOP)){
-        return "linker";
-    }
-
     return "";
 }
 
@@ -354,12 +341,10 @@
     soinfo *si;
     unsigned addr = (unsigned)pc;
 
-    if ((addr < LINKER_BASE) || (addr >= LINKER_TOP)) {
-        for (si = solist; si != 0; si = si->next){
-            if ((addr >= si->base) && (addr < (si->base + si->size))) {
-                *pcount = si->ARM_exidx_count;
-                return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
-            }
+    for (si = solist; si != 0; si = si->next){
+        if ((addr >= si->base) && (addr < (si->base + si->size))) {
+            *pcount = si->ARM_exidx_count;
+            return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
         }
     }
    *pcount = 0;
@@ -420,6 +405,33 @@
     return NULL;
 }
 
+/*
+ * Essentially the same method as _elf_lookup() above, but only
+ * searches for LOCAL symbols
+ */
+static Elf32_Sym *_elf_lookup_local(soinfo *si, unsigned hash, const char *name)
+{
+    Elf32_Sym *symtab = si->symtab;
+    const char *strtab = si->strtab;
+    unsigned n = hash % si->nbucket;;
+
+    TRACE_TYPE(LOOKUP, "%5d LOCAL SEARCH %s in %s@0x%08x %08x %d\n", pid,
+               name, si->name, si->base, hash, hash % si->nbucket);
+    for(n = si->bucket[hash % si->nbucket]; n != 0; n = si->chain[n]){
+        Elf32_Sym *s = symtab + n;
+        if (strcmp(strtab + s->st_name, name)) continue;
+        if (ELF32_ST_BIND(s->st_info) != STB_LOCAL) continue;
+        /* no section == undefined */
+        if(s->st_shndx == 0) continue;
+
+        TRACE_TYPE(LOOKUP, "%5d FOUND LOCAL %s in %s (%08x) %d\n", pid,
+                   name, si->name, s->st_value, s->st_size);
+        return s;
+    }
+
+    return NULL;
+}
+
 static unsigned elfhash(const char *_name)
 {
     const unsigned char *name = (const unsigned char *) _name;
@@ -443,7 +455,17 @@
     soinfo *lsi = si;
     int i;
 
-    /* Look for symbols in the local scope first (the object who is
+    /* If we are trying to find a symbol for the linker itself, look
+     * for LOCAL symbols first. Avoid using LOCAL symbols for other
+     * shared libraries until we have a better understanding of what
+     * might break by doing so. */
+    if (si->flags & FLAG_LINKER) {
+        s = _elf_lookup_local(si, elf_hash, name);
+        if(s != NULL)
+            goto done;
+    }
+
+    /* Look for symbols in the local scope (the object who is
      * searching). This happens with C++ templates on i386 for some
      * reason.
      *
@@ -452,6 +474,7 @@
      * dynamic linking.  Some systems return the first definition found
      * and some the first non-weak definition.   This is system dependent.
      * Here we return the first definition found for simplicity.  */
+
     s = _elf_lookup(si, elf_hash, name);
     if(s != NULL)
         goto done;
@@ -1723,10 +1746,10 @@
     DEBUG("%5d si->base = 0x%08x si->flags = 0x%08x\n", pid,
           si->base, si->flags);
 
-    if (si->flags & FLAG_EXE) {
+    if (si->flags & (FLAG_EXE | FLAG_LINKER)) {
         /* Locate the needed program segments (DYNAMIC/ARM_EXIDX) for
-         * linkage info if this is the executable. If this was a
-         * dynamic lib, that would have been done at load time.
+         * linkage info if this is the executable or the linker itself. 
+         * If this was a dynamic lib, that would have been done at load time.
          *
          * TODO: It's unfortunate that small pieces of this are
          * repeated from the load_library routine. Refactor this just
@@ -1745,16 +1768,17 @@
             if (phdr->p_type == PT_LOAD) {
                 /* For the executable, we use the si->size field only in
                    dl_unwind_find_exidx(), so the meaning of si->size
-                   is not the size of the executable; it is the last
-                   virtual address of the loadable part of the executable;
-                   since si->base == 0 for an executable, we use the
-                   range [0, si->size) to determine whether a PC value
-                   falls within the executable section.  Of course, if
-                   a value is below phdr->p_vaddr, it's not in the
-                   executable section, but a) we shouldn't be asking for
-                   such a value anyway, and b) if we have to provide
-                   an EXIDX for such a value, then the executable's
-                   EXIDX is probably the better choice.
+                   is not the size of the executable; it is the distance
+                   between the load location of the executable and the last
+                   address of the loadable part of the executable.
+                   We use the range [si->base, si->base + si->size) to
+                   determine whether a PC value falls within the executable
+                   section. Of course, if a value is between si->base and
+                   (si->base + phdr->p_vaddr), it's not in the executable
+                   section, but a) we shouldn't be asking for such a value
+                   anyway, and b) if we have to provide an EXIDX for such a
+                   value, then the executable's EXIDX is probably the better
+                   choice.
                 */
                 DEBUG_DUMP_PHDR(phdr, "PT_LOAD", pid);
                 if (phdr->p_vaddr + phdr->p_memsz > si->size)
@@ -1764,12 +1788,20 @@
                 if (!(phdr->p_flags & PF_W)) {
                     unsigned _end;
 
-                    if (phdr->p_vaddr < si->wrprotect_start)
-                        si->wrprotect_start = phdr->p_vaddr;
-                    _end = (((phdr->p_vaddr + phdr->p_memsz + PAGE_SIZE - 1) &
+                    if (si->base + phdr->p_vaddr < si->wrprotect_start)
+                        si->wrprotect_start = si->base + phdr->p_vaddr;
+                    _end = (((si->base + phdr->p_vaddr + phdr->p_memsz + PAGE_SIZE - 1) &
                              (~PAGE_MASK)));
                     if (_end > si->wrprotect_end)
                         si->wrprotect_end = _end;
+                    /* Make the section writable just in case we'll have to
+                     * write to it during relocation (i.e. text segment).
+                     * However, we will remember what range of addresses
+                     * should be write protected.
+                     */
+                    mprotect((void *) (si->base + phdr->p_vaddr),
+                             phdr->p_memsz,
+                             PFLAGS_TO_PROT(phdr->p_flags) | PROT_WRITE);
                 }
             } else if (phdr->p_type == PT_DYNAMIC) {
                 if (si->dynamic != (unsigned *)-1) {
@@ -2079,7 +2111,12 @@
 
 static void * __tls_area[ANDROID_TLS_SLOTS];
 
-unsigned __linker_init(unsigned **elfdata)
+/*
+ * This code is called after the linker has linked itself and
+ * fixed it's own GOT. It is safe to make references to externs
+ * and other non-local data at this point.
+ */
+static unsigned __linker_init_post_relocation(unsigned **elfdata)
 {
     static soinfo linker_soinfo;
 
@@ -2187,7 +2224,18 @@
         vecs += 2;
     }
 
+    /* Compute the value of si->base. We can't rely on the fact that
+     * the first entry is the PHDR because this will not be true
+     * for certain executables (e.g. some in the NDK unit test suite)
+     */
+    int nn;
     si->base = 0;
+    for ( nn = 0; nn < si->phnum; nn++ ) {
+        if (si->phdr[nn].p_type == PT_PHDR) {
+            si->base = (Elf32_Addr) si->phdr - si->phdr[nn].p_vaddr;
+            break;
+        }
+    }
     si->dynamic = (unsigned *)-1;
     si->wrprotect_start = 0xffffffff;
     si->wrprotect_end = 0;
@@ -2256,3 +2304,69 @@
           si->entry);
     return si->entry;
 }
+
+/*
+ * Find the value of AT_BASE passed to us by the kernel. This is the load
+ * location of the linker.
+ */
+static unsigned find_linker_base(unsigned **elfdata) {
+    int argc = (int) *elfdata;
+    char **argv = (char**) (elfdata + 1);
+    unsigned *vecs = (unsigned*) (argv + argc + 1);
+    while (vecs[0] != 0) {
+        vecs++;
+    }
+
+    /* The end of the environment block is marked by two NULL pointers */
+    vecs++;
+
+    while(vecs[0]) {
+        if (vecs[0] == AT_BASE) {
+            return vecs[1];
+        }
+        vecs += 2;
+    }
+
+    return 0; // should never happen
+}
+
+/*
+ * This is the entry point for the linker, called from begin.S. This
+ * method is responsible for fixing the linker's own relocations, and
+ * then calling __linker_init_post_relocation().
+ *
+ * Because this method is called before the linker has fixed it's own
+ * relocations, any attempt to reference an extern variable, extern
+ * function, or other GOT reference will generate a segfault.
+ */
+unsigned __linker_init(unsigned **elfdata) {
+    unsigned linker_addr = find_linker_base(elfdata);
+    Elf32_Ehdr *elf_hdr = (Elf32_Ehdr *) linker_addr;
+    Elf32_Phdr *phdr =
+        (Elf32_Phdr *)((unsigned char *) linker_addr + elf_hdr->e_phoff);
+
+    soinfo linker_so;
+    memset(&linker_so, 0, sizeof(soinfo));
+
+    linker_so.base = linker_addr;
+    linker_so.dynamic = (unsigned *) -1;
+    linker_so.phdr = phdr;
+    linker_so.phnum = elf_hdr->e_phnum;
+    linker_so.flags |= FLAG_LINKER;
+    linker_so.wrprotect_start = 0xffffffff;
+    linker_so.wrprotect_end = 0;
+
+    if (link_image(&linker_so, 0)) {
+        // It would be nice to print an error message, but if the linker
+        // can't link itself, there's no guarantee that we'll be able to
+        // call write() (because it involves a GOT reference).
+        //
+        // This situation should never occur unless the linker itself
+        // is corrupt.
+        exit(-1);
+    }
+
+    // We have successfully fixed our own relocations. It's safe to run
+    // the main part of the linker now.
+    return __linker_init_post_relocation(elfdata);
+}
diff --git a/linker/linker.h b/linker/linker.h
index 7b1ba51..aa1e5e7 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -83,6 +83,7 @@
 #define FLAG_LINKED     0x00000001
 #define FLAG_ERROR      0x00000002
 #define FLAG_EXE        0x00000004 // The main executable
+#define FLAG_LINKER     0x00000010 // The linker itself
 
 #define SOINFO_NAME_LEN 128