Handle RDRAND failures.

I mistakenly believed that only RDSEED could fail. However, the Intel
manuals state that RDRAND can fail too.

This change cherry-picks the following BoringSSL changes:

2cac3506 – Handle RDRAND failures.
248abbd7 – Add missing comma in .type pragma for rdrand code.

Change-Id: Icdc56a50ce36e9c525063583882c676a5312d313
diff --git a/linux-x86_64/crypto/rand/rdrand-x86_64.S b/linux-x86_64/crypto/rand/rdrand-x86_64.S
index 622ae55..94aab9c 100644
--- a/linux-x86_64/crypto/rand/rdrand-x86_64.S
+++ b/linux-x86_64/crypto/rand/rdrand-x86_64.S
@@ -1,11 +1,48 @@
 #if defined(__x86_64__)
 .text	
 
+
+
+
 .globl	CRYPTO_rdrand
 .hidden CRYPTO_rdrand
 .type	CRYPTO_rdrand,@function
 .align	16
 CRYPTO_rdrand:
-.byte	0x48, 0x0f, 0xc7, 0xf0
+	xorq	%rax,%rax
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+
+	adcq	%rax,%rax
+	movq	%rcx,0(%rdi)
+	.byte	0xf3,0xc3
+
+
+
+
+
+.globl	CRYPTO_rdrand_multiple8_buf
+.hidden CRYPTO_rdrand_multiple8_buf
+.type	CRYPTO_rdrand_multiple8_buf,@function
+.align	16
+CRYPTO_rdrand_multiple8_buf:
+	testq	%rsi,%rsi
+	jz	.Lout
+	movq	$8,%rdx
+.Lloop:
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+	jnc	.Lerr
+	movq	%rcx,0(%rdi)
+	addq	%rdx,%rdi
+	subq	%rdx,%rsi
+	jnz	.Lloop
+.Lout:
+	movq	$1,%rax
+	.byte	0xf3,0xc3
+.Lerr:
+	xorq	%rax,%rax
 	.byte	0xf3,0xc3
 #endif
diff --git a/mac-x86_64/crypto/rand/rdrand-x86_64.S b/mac-x86_64/crypto/rand/rdrand-x86_64.S
index 1ba990f..f0df296 100644
--- a/mac-x86_64/crypto/rand/rdrand-x86_64.S
+++ b/mac-x86_64/crypto/rand/rdrand-x86_64.S
@@ -1,11 +1,48 @@
 #if defined(__x86_64__)
 .text	
 
+
+
+
 .globl	_CRYPTO_rdrand
 .private_extern _CRYPTO_rdrand
 
 .p2align	4
 _CRYPTO_rdrand:
-.byte	0x48, 0x0f, 0xc7, 0xf0
+	xorq	%rax,%rax
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+
+	adcq	%rax,%rax
+	movq	%rcx,0(%rdi)
+	.byte	0xf3,0xc3
+
+
+
+
+
+.globl	_CRYPTO_rdrand_multiple8_buf
+.private_extern _CRYPTO_rdrand_multiple8_buf
+
+.p2align	4
+_CRYPTO_rdrand_multiple8_buf:
+	testq	%rsi,%rsi
+	jz	L$out
+	movq	$8,%rdx
+L$loop:
+
+
+.byte	0x48, 0x0f, 0xc7, 0xf1
+	jnc	L$err
+	movq	%rcx,0(%rdi)
+	addq	%rdx,%rdi
+	subq	%rdx,%rsi
+	jnz	L$loop
+L$out:
+	movq	$1,%rax
+	.byte	0xf3,0xc3
+L$err:
+	xorq	%rax,%rax
 	.byte	0xf3,0xc3
 #endif
diff --git a/src/crypto/rand/asm/rdrand-x86_64.pl b/src/crypto/rand/asm/rdrand-x86_64.pl
index a917611..c32a55c 100644
--- a/src/crypto/rand/asm/rdrand-x86_64.pl
+++ b/src/crypto/rand/asm/rdrand-x86_64.pl
@@ -1,5 +1,19 @@
 #!/usr/bin/env perl
 
+# Copyright (c) 2015, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
 $flavour = shift;
 $output  = shift;
 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
@@ -14,11 +28,47 @@
 print<<___;
 .text
 
+# CRYPTO_rdrand writes eight bytes of random data from the hardware RNG to
+# |out|. It returns one on success or zero on hardware failure.
+# int CRYPTO_rdrand(uint8_t out[8]);
 .globl	CRYPTO_rdrand
 .type	CRYPTO_rdrand,\@function,1
 .align	16
 CRYPTO_rdrand:
-	.byte 0x48, 0x0f, 0xc7, 0xf0
+	xorq %rax, %rax
+	# This is rdrand %rcx. It sets rcx to a random value and sets the carry
+	# flag on success.
+	.byte 0x48, 0x0f, 0xc7, 0xf1
+	# An add-with-carry of zero effectively sets %rax to the carry flag.
+	adcq %rax, %rax
+	movq %rcx, 0(%rdi)
+	retq
+
+# CRYPTO_rdrand_multiple8_buf fills |len| bytes at |buf| with random data from
+# the hardware RNG. The |len| argument must be a multiple of eight. It returns
+# one on success and zero on hardware failure.
+# int CRYPTO_rdrand_multiple8_buf(uint8_t *buf, size_t len);
+.globl CRYPTO_rdrand_multiple8_buf
+.type CRYPTO_rdrand_multiple8_buf,\@function,2
+.align 16
+CRYPTO_rdrand_multiple8_buf:
+	test %rsi, %rsi
+	jz .Lout
+	movq \$8, %rdx
+.Lloop:
+	# This is rdrand %rcx. It sets rcx to a random value and sets the carry
+	# flag on success.
+	.byte 0x48, 0x0f, 0xc7, 0xf1
+	jnc .Lerr
+	movq %rcx, 0(%rdi)
+	addq %rdx, %rdi
+	subq %rdx, %rsi
+	jnz .Lloop
+.Lout:
+	movq \$1, %rax
+	retq
+.Lerr:
+	xorq %rax, %rax
 	retq
 ___
 
diff --git a/src/crypto/rand/hwrand.c b/src/crypto/rand/hwrand.c
index 73d3de7..5f81f09 100644
--- a/src/crypto/rand/hwrand.c
+++ b/src/crypto/rand/hwrand.c
@@ -14,6 +14,7 @@
 
 #include <openssl/rand.h>
 
+#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -26,21 +27,28 @@
   return (OPENSSL_ia32cap_P[1] & (1u << 30)) != 0;
 }
 
-/* CRYPTO_rdrand is defined in asm/rdrand-x86_64.pl */
-extern uint64_t CRYPTO_rdrand(void);
+/* These functions are defined in asm/rdrand-x86_64.pl */
+extern int CRYPTO_rdrand(uint8_t out[8]);
+extern int CRYPTO_rdrand_multiple8_buf(uint8_t *buf, size_t len);
 
-void CRYPTO_hwrand(uint8_t *buf, size_t len) {
-  while (len >= 8) {
-    uint64_t rand = CRYPTO_rdrand();
-    memcpy(buf, &rand, sizeof(rand));
-    len -= sizeof(rand);
-    buf += sizeof(rand);
+int CRYPTO_hwrand(uint8_t *buf, size_t len) {
+  const size_t len_multiple8 = len & ~7;
+  if (!CRYPTO_rdrand_multiple8_buf(buf, len_multiple8)) {
+    return 0;
+  }
+  len -= len_multiple8;
+
+  if (len != 0) {
+    assert(len < 8);
+
+    uint8_t rand_buf[8];
+    if (!CRYPTO_rdrand(rand_buf)) {
+      return 0;
+    }
+    memcpy(buf + len_multiple8, rand_buf, len);
   }
 
-  if (len > 0) {
-    uint64_t rand = CRYPTO_rdrand();
-    memcpy(buf, &rand, len);
-  }
+  return 1;
 }
 
 #else
diff --git a/src/crypto/rand/internal.h b/src/crypto/rand/internal.h
index 1cca7f3..5e6ea11 100644
--- a/src/crypto/rand/internal.h
+++ b/src/crypto/rand/internal.h
@@ -29,8 +29,9 @@
 int CRYPTO_have_hwrand(void);
 
 /* CRYPTO_hwrand fills |len| bytes at |buf| with entropy from the hardware.
- * This function can only be called if |CRYPTO_have_hwrand| returns one. */
-void CRYPTO_hwrand(uint8_t *buf, size_t len);
+ * This function can only be called if |CRYPTO_have_hwrand| returns one.
+ * It returns one on success or zero on hardware failure. */
+int CRYPTO_hwrand(uint8_t *buf, size_t len);
 
 
 #if defined(__cplusplus)
diff --git a/src/crypto/rand/rand.c b/src/crypto/rand/rand.c
index a647b6a..a96ac48 100644
--- a/src/crypto/rand/rand.c
+++ b/src/crypto/rand/rand.c
@@ -78,7 +78,8 @@
     return 1;
   }
 
-  if (!CRYPTO_have_hwrand()) {
+  if (!CRYPTO_have_hwrand() ||
+      !CRYPTO_hwrand(buf, len)) {
     /* Without a hardware RNG to save us from address-space duplication, the OS
      * entropy is used directly. */
     CRYPTO_sysrand(buf, len);
@@ -108,8 +109,6 @@
     state->partial_block_used = sizeof(state->partial_block);
   }
 
-  CRYPTO_hwrand(buf, len);
-
   if (len >= sizeof(state->partial_block)) {
     size_t remaining = len;
     while (remaining > 0) {
diff --git a/win-x86_64/crypto/rand/rdrand-x86_64.asm b/win-x86_64/crypto/rand/rdrand-x86_64.asm
index a63ea69..4c03791 100644
--- a/win-x86_64/crypto/rand/rdrand-x86_64.asm
+++ b/win-x86_64/crypto/rand/rdrand-x86_64.asm
@@ -5,6 +5,9 @@
 section	.text code align=64
 
 
+
+
+
 global	CRYPTO_rdrand
 
 ALIGN	16
@@ -16,7 +19,52 @@
 	mov	rdi,rcx
 
 
-DB	0x48,0x0f,0xc7,0xf0
+	xor	rax,rax
+
+
+DB	0x48,0x0f,0xc7,0xf1
+
+	adc	rax,rax
+	mov	QWORD[rdi],rcx
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+global	CRYPTO_rdrand_multiple8_buf
+
+ALIGN	16
+CRYPTO_rdrand_multiple8_buf:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_CRYPTO_rdrand_multiple8_buf:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+	test	rsi,rsi
+	jz	NEAR $L$out
+	mov	rdx,8
+$L$loop:
+
+
+DB	0x48,0x0f,0xc7,0xf1
+	jnc	NEAR $L$err
+	mov	QWORD[rdi],rcx
+	add	rdi,rdx
+	sub	rsi,rdx
+	jnz	NEAR $L$loop
+$L$out:
+	mov	rax,1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+$L$err:
+	xor	rax,rax
 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
 	mov	rsi,QWORD[16+rsp]
 	DB	0F3h,0C3h		;repret