MdePkg BaseMemoryLibSse2: Add SSE2 implementation of API IsZeroBuffer()

Add the implementation of API IsZeroBuffer() via assembly in
BaseMemoryLibSse2.

The assembly codes use SSE2 XMM registers and related instructions.

Cc: Michael D Kinney <michael.d.kinney@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Jiewen Yao <jiewen.yao@intel.com>
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Hao Wu <hao.a.wu@intel.com>
Reviewed-by: Liming Gao <liming.gao@intel.com>
diff --git a/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf b/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf
index a78d823..fc82b16 100644
--- a/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf
+++ b/MdePkg/Library/BaseMemoryLibSse2/BaseMemoryLibSse2.inf
@@ -3,7 +3,7 @@
 #

 #  Base Memory Library that uses SSE2 registers for high performance.

 #

-#  Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>

+#  Copyright (c) 2007 - 2016, Intel Corporation. All rights reserved.<BR>

 #

 #  This program and the accompanying materials

 #  are licensed and made available under the terms and conditions of the BSD License

@@ -42,6 +42,7 @@
   SetMem16Wrapper.c

   SetMemWrapper.c

   CopyMemWrapper.c

+  IsZeroBufferWrapper.c

   MemLibGuid.c

 

 [Sources.Ia32]

@@ -89,6 +90,7 @@
   Ia32/SetMem.asm

   Ia32/CopyMem.nasm

   Ia32/CopyMem.asm

+  Ia32/IsZeroBuffer.nasm

 

 [Sources.X64]

   X64/ScanMem64.nasm

@@ -135,6 +137,7 @@
   X64/SetMem.S

   X64/CopyMem.nasm

   X64/CopyMem.S

+  X64/IsZeroBuffer.nasm

 

 [Packages]

   MdePkg/MdePkg.dec

diff --git a/MdePkg/Library/BaseMemoryLibSse2/Ia32/IsZeroBuffer.nasm b/MdePkg/Library/BaseMemoryLibSse2/Ia32/IsZeroBuffer.nasm
new file mode 100644
index 0000000..68c09fe
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibSse2/Ia32/IsZeroBuffer.nasm
@@ -0,0 +1,74 @@
+;------------------------------------------------------------------------------

+;

+; Copyright (c) 2016, Intel Corporation. All rights reserved.<BR>

+; This program and the accompanying materials

+; are licensed and made available under the terms and conditions of the BSD License

+; which accompanies this distribution.  The full text of the license may be found at

+; http://opensource.org/licenses/bsd-license.php.

+;

+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,

+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.

+;

+; Module Name:

+;

+;   IsZeroBuffer.nasm

+;

+; Abstract:

+;

+;   IsZeroBuffer function

+;

+; Notes:

+;

+;------------------------------------------------------------------------------

+

+    SECTION .text

+

+;------------------------------------------------------------------------------

+;  BOOLEAN

+;  EFIAPI

+;  InternalMemIsZeroBuffer (

+;    IN CONST VOID  *Buffer,

+;    IN UINTN       Length

+;    );

+;------------------------------------------------------------------------------

+global ASM_PFX(InternalMemIsZeroBuffer)

+ASM_PFX(InternalMemIsZeroBuffer):

+    push         edi

+    mov          edi, [esp + 8]        ; edi <- Buffer

+    mov          edx, [esp + 12]       ; edx <- Length

+    xor          ecx, ecx              ; ecx <- 0

+    sub          ecx, edi

+    and          ecx, 15               ; ecx + edi aligns on 16-byte boundary

+    jz           @Is16BytesZero

+    cmp          ecx, edx

+    cmova        ecx, edx              ; bytes before the 16-byte boundary

+    sub          edx, ecx

+    xor          eax, eax              ; eax <- 0, also set ZF

+    repe         scasb

+    jnz          @ReturnFalse          ; ZF=0 means non-zero element found

+@Is16BytesZero:

+    mov          ecx, edx

+    and          edx, 15

+    shr          ecx, 4

+    jz           @IsBytesZero

+.0:

+    pxor         xmm0, xmm0            ; xmm0 <- 0

+    pcmpeqb      xmm0, [edi]           ; check zero for 16 bytes

+    pmovmskb     eax, xmm0             ; eax <- compare results

+    cmp          eax, 0xffff

+    jnz          @ReturnFalse

+    add          edi, 16

+    loop         .0

+@IsBytesZero:

+    mov          ecx, edx

+    xor          eax, eax              ; eax <- 0, also set ZF

+    repe         scasb

+    jnz          @ReturnFalse          ; ZF=0 means non-zero element found

+    pop          edi

+    mov          eax, 1                ; return TRUE

+    ret

+@ReturnFalse:

+    pop          edi

+    xor          eax, eax

+    ret                                ; return FALSE

+

diff --git a/MdePkg/Library/BaseMemoryLibSse2/IsZeroBufferWrapper.c b/MdePkg/Library/BaseMemoryLibSse2/IsZeroBufferWrapper.c
new file mode 100644
index 0000000..c42c1aa
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibSse2/IsZeroBufferWrapper.c
@@ -0,0 +1,54 @@
+/** @file

+  Implementation of IsZeroBuffer function.

+

+  The following BaseMemoryLib instances contain the same copy of this file:

+

+    BaseMemoryLib

+    BaseMemoryLibMmx

+    BaseMemoryLibSse2

+    BaseMemoryLibRepStr

+    BaseMemoryLibOptDxe

+    BaseMemoryLibOptPei

+    PeiMemoryLib

+    UefiMemoryLib

+

+  Copyright (c) 2016, Intel Corporation. All rights reserved.<BR>

+  This program and the accompanying materials

+  are licensed and made available under the terms and conditions of the BSD License

+  which accompanies this distribution.  The full text of the license may be found at

+  http://opensource.org/licenses/bsd-license.php

+

+  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,

+  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.

+

+**/

+

+#include "MemLibInternals.h"

+

+/**

+  Checks if the contents of a buffer are all zeros.

+

+  This function checks whether the contents of a buffer are all zeros. If the

+  contents are all zeros, return TRUE. Otherwise, return FALSE.

+

+  If Length > 0 and Buffer is NULL, then ASSERT().

+  If Length is greater than (MAX_ADDRESS - Buffer + 1), then ASSERT().

+

+  @param  Buffer      The pointer to the buffer to be checked.

+  @param  Length      The size of the buffer (in bytes) to be checked.

+

+  @retval TRUE        Contents of the buffer are all zeros.

+  @retval FALSE       Contents of the buffer are not all zeros.

+

+**/

+BOOLEAN

+EFIAPI

+IsZeroBuffer (

+  IN CONST VOID  *Buffer,

+  IN UINTN       Length

+  )

+{

+  ASSERT (!(Buffer == NULL && Length > 0));

+  ASSERT ((Length - 1) <= (MAX_ADDRESS - (UINTN)Buffer));

+  return InternalMemIsZeroBuffer (Buffer, Length);

+}

diff --git a/MdePkg/Library/BaseMemoryLibSse2/MemLibInternals.h b/MdePkg/Library/BaseMemoryLibSse2/MemLibInternals.h
index c8e80af..99fd515 100644
--- a/MdePkg/Library/BaseMemoryLibSse2/MemLibInternals.h
+++ b/MdePkg/Library/BaseMemoryLibSse2/MemLibInternals.h
@@ -9,7 +9,7 @@
     BaseMemoryLibOptDxe

     BaseMemoryLibOptPei

 

-  Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>

+  Copyright (c) 2006 - 2016, Intel Corporation. All rights reserved.<BR>

   This program and the accompanying materials

   are licensed and made available under the terms and conditions of the BSD License

   which accompanies this distribution.  The full text of the license may be found at

@@ -231,4 +231,21 @@
   IN      UINT64                    Value

   );

 

+/**

+  Checks whether the contents of a buffer are all zeros.

+

+  @param  Buffer  The pointer to the buffer to be checked.

+  @param  Length  The size of the buffer (in bytes) to be checked.

+

+  @retval TRUE    Contents of the buffer are all zeros.

+  @retval FALSE   Contents of the buffer are not all zeros.

+

+**/

+BOOLEAN

+EFIAPI

+InternalMemIsZeroBuffer (

+  IN CONST VOID  *Buffer,

+  IN UINTN       Length

+  );

+

 #endif

diff --git a/MdePkg/Library/BaseMemoryLibSse2/X64/IsZeroBuffer.nasm b/MdePkg/Library/BaseMemoryLibSse2/X64/IsZeroBuffer.nasm
new file mode 100644
index 0000000..3a0dc6f
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibSse2/X64/IsZeroBuffer.nasm
@@ -0,0 +1,76 @@
+;------------------------------------------------------------------------------

+;

+; Copyright (c) 2016, Intel Corporation. All rights reserved.<BR>

+; This program and the accompanying materials

+; are licensed and made available under the terms and conditions of the BSD License

+; which accompanies this distribution.  The full text of the license may be found at

+; http://opensource.org/licenses/bsd-license.php.

+;

+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,

+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.

+;

+; Module Name:

+;

+;   IsZeroBuffer.nasm

+;

+; Abstract:

+;

+;   IsZeroBuffer function

+;

+; Notes:

+;

+;------------------------------------------------------------------------------

+

+    DEFAULT REL

+    SECTION .text

+

+;------------------------------------------------------------------------------

+;  BOOLEAN

+;  EFIAPI

+;  InternalMemIsZeroBuffer (

+;    IN CONST VOID  *Buffer,

+;    IN UINTN       Length

+;    );

+;------------------------------------------------------------------------------

+global ASM_PFX(InternalMemIsZeroBuffer)

+ASM_PFX(InternalMemIsZeroBuffer):

+    push         rdi

+    mov          rdi, rcx              ; rdi <- Buffer

+    xor          rcx, rcx              ; rcx <- 0

+    sub          rcx, rdi

+    and          rcx, 15               ; rcx + rdi aligns on 16-byte boundary

+    jz           @Is16BytesZero

+    cmp          rcx, rdx              ; Length already in rdx

+    cmova        rcx, rdx              ; bytes before the 16-byte boundary

+    sub          rdx, rcx

+    xor          rax, rax              ; rax <- 0, also set ZF

+    repe         scasb

+    jnz          @ReturnFalse          ; ZF=0 means non-zero element found

+@Is16BytesZero:

+    mov          rcx, rdx

+    and          rdx, 15

+    shr          rcx, 4

+    jz           @IsBytesZero

+.0:

+    pxor         xmm0, xmm0            ; xmm0 <- 0

+    pcmpeqb      xmm0, [rdi]           ; check zero for 16 bytes

+    pmovmskb     eax, xmm0             ; eax <- compare results

+                                       ; nasm doesn't support 64-bit destination

+                                       ; for pmovmskb

+    cmp          eax, 0xffff

+    jnz          @ReturnFalse

+    add          rdi, 16

+    loop         .0

+@IsBytesZero:

+    mov          rcx, rdx

+    xor          rax, rax              ; rax <- 0, also set ZF

+    repe         scasb

+    jnz          @ReturnFalse          ; ZF=0 means non-zero element found

+    pop          rdi

+    mov          rax, 1                ; return TRUE

+    ret

+@ReturnFalse:

+    pop          rdi

+    xor          rax, rax

+    ret                                ; return FALSE

+