MdePkg BaseMemoryLibOptDxe: Convert X64/CopyMem.asm to NASM

The BaseTools/Scripts/ConvertMasmToNasm.py script was used to convert
X64/CopyMem.asm to X64/CopyMem.nasm

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf b/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf
index b5e750a..e637034 100644
--- a/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/BaseMemoryLibOptDxe.inf
@@ -122,6 +122,7 @@
   X64/SetMem.nasm

   X64/SetMem.asm

   X64/SetMem.S

+  X64/CopyMem.nasm

   X64/CopyMem.asm

   X64/CopyMem.S

   ScanMem64Wrapper.c

diff --git a/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.nasm b/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.nasm
new file mode 100644
index 0000000..d312c2d
--- /dev/null
+++ b/MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.nasm
@@ -0,0 +1,83 @@
+;------------------------------------------------------------------------------

+;

+; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>

+; This program and the accompanying materials

+; are licensed and made available under the terms and conditions of the BSD License

+; which accompanies this distribution.  The full text of the license may be found at

+; http://opensource.org/licenses/bsd-license.php.

+;

+; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,

+; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.

+;

+; Module Name:

+;

+;   CopyMem.nasm

+;

+; Abstract:

+;

+;   CopyMem function

+;

+; Notes:

+;

+;------------------------------------------------------------------------------

+

+    DEFAULT REL

+    SECTION .text

+

+;------------------------------------------------------------------------------

+;  VOID *

+;  EFIAPI

+;  InternalMemCopyMem (

+;    IN VOID   *Destination,

+;    IN VOID   *Source,

+;    IN UINTN  Count

+;    );

+;------------------------------------------------------------------------------

+global ASM_PFX(InternalMemCopyMem)

+ASM_PFX(InternalMemCopyMem):

+    push    rsi

+    push    rdi

+    mov     rsi, rdx                    ; rsi <- Source

+    mov     rdi, rcx                    ; rdi <- Destination

+    lea     r9, [rsi + r8 - 1]          ; r9 <- Last byte of Source

+    cmp     rsi, rdi

+    mov     rax, rdi                    ; rax <- Destination as return value

+    jae     .0                          ; Copy forward if Source > Destination

+    cmp     r9, rdi                     ; Overlapped?

+    jae     @CopyBackward               ; Copy backward if overlapped

+.0:

+    xor     rcx, rcx

+    sub     rcx, rdi                    ; rcx <- -rdi

+    and     rcx, 15                     ; rcx + rsi should be 16 bytes aligned

+    jz      .1                          ; skip if rcx == 0

+    cmp     rcx, r8

+    cmova   rcx, r8

+    sub     r8, rcx

+    rep     movsb

+.1:

+    mov     rcx, r8

+    and     r8, 15

+    shr     rcx, 4                      ; rcx <- # of DQwords to copy

+    jz      @CopyBytes

+    movdqa  [rsp + 0x18], xmm0           ; save xmm0 on stack

+.2:

+    movdqu  xmm0, [rsi]                 ; rsi may not be 16-byte aligned

+    movntdq [rdi], xmm0                 ; rdi should be 16-byte aligned

+    add     rsi, 16

+    add     rdi, 16

+    loop    .2

+    mfence

+    movdqa  xmm0, [rsp + 0x18]           ; restore xmm0

+    jmp     @CopyBytes                  ; copy remaining bytes

+@CopyBackward:

+    mov     rsi, r9                     ; rsi <- Last byte of Source

+    lea     rdi, [rdi + r8 - 1]         ; rdi <- Last byte of Destination

+    std

+@CopyBytes:

+    mov     rcx, r8

+    rep     movsb

+    cld

+    pop     rdi

+    pop     rsi

+    ret

+