Fix the x86_64 clone implementation.

Change-Id: Ia75f46dcb4d3222049e9a6a6fabc2b17223b47f7
diff --git a/libc/arch-x86_64/bionic/clone.S b/libc/arch-x86_64/bionic/clone.S
index a9adeaa..2ae0e85 100644
--- a/libc/arch-x86_64/bionic/clone.S
+++ b/libc/arch-x86_64/bionic/clone.S
@@ -33,13 +33,13 @@
 ENTRY(__pthread_clone)
         # Save tls.
         movq    %rsi, %r11
-        # 16-byte alignment for child stack.
+        # Enforce 16-byte alignment for child stack.
         andq    $~15, %rsi
 
-        # Copy arguments onto the child stack.
-        movq    %rdi, -32(%rsi) # fn
-        movq    %rcx, -24(%rsi) # arg
-        movq    %r11, -16(%rsi) # tls
+        # Copy 'fn', 'arg', and 'tls' onto the child stack.
+        movq    %rdi, -32(%rsi)  # fn
+        movq    %rcx, -24(%rsi)  # arg
+        movq    %r11, -16(%rsi)  # tls
         subq    $32, %rsi
 
         movq    %rdx, %rdi
@@ -57,33 +57,32 @@
 1:
         jnz     2f
 
-        # We're in the child thread now, call __thread_entry
+        # We're in the child now, so call __thread_entry
         # with the arguments from the child stack moved into
         # the appropriate registers.
-        popq    %rdi
-        popq    %rsi
-        popq    %rdx
+        popq    %rdi  # fn
+        popq    %rsi  # arg
+        popq    %rdx  # tls
         call    __thread_entry
         hlt
 2:
         ret
 
-/*
- * int  __bionic_clone(unsigned long clone_flags,
- *                     void*         newsp,
- *                     int           *parent_tidptr,
- *                     void          *new_tls,
- *                     int           *child_tidptr,
- *                     int           (*fn)(void *),
- *                     void          *arg);
- */
+// int __bionic_clone(unsigned long clone_flags,
+//                    void* new_sp,
+//                    int* parent_tid_ptr,
+//                    void* new_tls,
+//                    int* child_tid_ptr,
+//                    int (*fn)(void*),
+//                    void* arg);
 ENTRY(__bionic_clone)
-        # insert arguments onto the child stack
+        # Enforce 16-byte alignment for child stack.
         andq    $~15, %rsi
-        movq    %r9, -16(%rsi)
-        # 7th argument (arg) goes through stack
-        movq    8(%rsp), %rax
-        movq    %rax, -8(%rsi)
+
+        # Copy 'fn' and 'arg' onto the child stack.
+        movq    %r9, -16(%rsi)  # fn
+        movq    8(%rsp), %rax   # Read 'arg'.
+        movq    %rax, -8(%rsi)  # Write 'arg'.
 
         subq    $16, %rsi
         movq    %r8, %r10
@@ -93,23 +92,21 @@
         testl   %eax, %eax
         jns     1f
 
-        # an error occurred, set errno and return -1
+        # An error occurred, set errno and return -1.
         negl    %eax
         movl    %eax, %edi
         call    __set_errno
         orl     $-1, %eax
         jmp     2f
-
 1:
         jnz     2f
 
-        # we're in the child now, call __bionic_clone_entry
-        # with the appropriate arguments on the child stack
-        # we already placed most of them
-        # TODO: write a test for __bionic_clone and then fix this too (see above).
+        # We're in the child now, so call __bionic_clone_entry
+        # with the arguments from the child stack moved into
+        # the appropriate registers.
+        popq    %rdi  # fn
+        popq    %rsi  # arg
         call    __bionic_clone_entry
         hlt
-
 2:
         ret
-
diff --git a/tests/Android.mk b/tests/Android.mk
index 5cc2d28..59e5129 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -71,6 +71,7 @@
     netdb_test.cpp \
     pthread_test.cpp \
     regex_test.cpp \
+    sched_test.cpp \
     signal_test.cpp \
     stack_protector_test.cpp \
     stack_unwinding_test.cpp \
diff --git a/tests/sched_test.cpp b/tests/sched_test.cpp
new file mode 100644
index 0000000..ec48a4b
--- /dev/null
+++ b/tests/sched_test.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+static int child_fn(void* i_ptr) {
+  *reinterpret_cast<int*>(i_ptr) = 42;
+  return 123;
+}
+
+TEST(sched, clone) {
+  void* child_stack[1024];
+
+  int i = 0;
+  pid_t tid = clone(child_fn, &child_stack[1024], /*CLONE_FILES | CLONE_FS | */CLONE_VM/* | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM*/, &i);
+
+  int status;
+  ASSERT_EQ(tid, TEMP_FAILURE_RETRY(waitpid(tid, &status, __WCLONE)));
+
+  ASSERT_EQ(42, i);
+
+  ASSERT_TRUE(WIFEXITED(status));
+  ASSERT_EQ(123, WEXITSTATUS(status));
+}