src/os_cpu/solaris_x86/vm/solaris_x86_32.s - platform/external/jetbrains/jdk8u_hotspot - Git at Google

 //
 // Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
 // under the terms of the GNU General Public License version 2 only, as
 // published by the Free Software Foundation.
 //
 // This code is distributed in the hope that it will be useful, but WITHOUT
 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 // version 2 for more details (a copy is included in the LICENSE file that
 // accompanied this code).
 //
 // You should have received a copy of the GNU General Public License version
 // 2 along with this work; if not, write to the Free Software Foundation,
 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 //
 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 // or visit www.oracle.com if you need additional information or have any
 // questions.
 //

 	.globl fixcw
 	.globl sse_check
 	.globl sse_unavailable
 	.globl gs_load
 	.globl gs_thread
         .globl _Atomic_cmpxchg_long_gcc

         // NOTE WELL!  The _Copy functions are called directly
 	// from server-compiler-generated code via CallLeafNoFP,
 	// which means that they *must* either not use floating
 	// point or use it in the same manner as does the server
 	// compiler.

         .globl _Copy_conjoint_bytes
         .globl _Copy_arrayof_conjoint_bytes
         .globl _Copy_conjoint_jshorts_atomic
 	.globl _Copy_arrayof_conjoint_jshorts
         .globl _Copy_conjoint_jints_atomic
         .globl _Copy_arrayof_conjoint_jints
 	.globl _Copy_conjoint_jlongs_atomic
         .globl _mmx_Copy_arrayof_conjoint_jshorts

 	.section .text,"ax"

 / Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
 / Set fpu to 53 bit precision.  This happens too early to use a stub.
 	.align   16
 fixcw:
 	pushl    $0x27f
 	fldcw    0(%esp)
 	popl     %eax
 	ret

         .align  16
         .globl  SpinPause
 SpinPause:
         rep
         nop
         movl    $1, %eax
         ret


 / Test SSE availability, used by os_solaris_i486.cpp
 	.align   16
 sse_check:
 	/ Fault if SSE not available
 	xorps %xmm0,%xmm0
 	/ No fault
 	movl $1,%eax
 	ret
 	/ Signal handler continues here if SSE is not available
 sse_unavailable:
 	xorl %eax,%eax
 	ret

 / Fast thread accessors, used by threadLS_solaris_i486.cpp
 	.align   16
 gs_load:
 	movl 4(%esp),%ecx
 	movl %gs:(%ecx),%eax
 	ret

 	.align   16
 gs_thread:
 	movl %gs:0x0,%eax
 	ret

         / Support for void Copy::conjoint_bytes(void* from,
         /                                       void* to,
         /                                       size_t count)
         .align   16
 _Copy_conjoint_bytes:
         pushl    %esi
         movl     4+12(%esp),%ecx      / count
         pushl    %edi
         movl     8+ 4(%esp),%esi      / from
         movl     8+ 8(%esp),%edi      / to
         cmpl     %esi,%edi
         leal     -1(%esi,%ecx),%eax   / from + count - 1
         jbe      cb_CopyRight
         cmpl     %eax,%edi
         jbe      cb_CopyLeft
         / copy from low to high
 cb_CopyRight:
         cmpl     $3,%ecx
         jbe      5f                   / <= 3 bytes
         / align source address at dword address boundary
         movl     %ecx,%eax            / original count
         movl     $4,%ecx
         subl     %esi,%ecx
         andl     $3,%ecx              / prefix byte count
         jz       1f                   / no prefix
         subl     %ecx,%eax            / byte count less prefix
         / copy prefix
         subl     %esi,%edi
 0:      movb     (%esi),%dl
         movb     %dl,(%edi,%esi,1)
         addl     $1,%esi
         subl     $1,%ecx
         jnz      0b
         addl     %esi,%edi
 1:      movl     %eax,%ecx            / byte count less prefix
         shrl     $2,%ecx              / dword count
         jz       4f                   / no dwords to move
         cmpl     $32,%ecx
         jbe      2f                   / <= 32 dwords
         / copy aligned dwords
         rep;     smovl
         jmp      4f
         / copy aligned dwords
 2:      subl     %esi,%edi
         .align   16
 3:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         addl     $4,%esi
         subl     $1,%ecx
         jnz      3b
         addl     %esi,%edi
 4:      movl     %eax,%ecx            / byte count less prefix
 5:      andl     $3,%ecx              / suffix byte count
         jz       7f                   / no suffix
         / copy suffix
         xorl     %eax,%eax
 6:      movb     (%esi,%eax,1),%dl
         movb     %dl,(%edi,%eax,1)
         addl     $1,%eax
         subl     $1,%ecx
         jnz      6b
 7:      popl     %edi
         popl     %esi
         ret
         / copy from high to low
 cb_CopyLeft:
         std
         leal     -4(%edi,%ecx),%edi   / to + count - 4
         movl     %eax,%esi            / from + count - 1
         movl     %ecx,%eax
         subl     $3,%esi              / from + count - 4
         cmpl     $3,%ecx
         jbe      5f                   / <= 3 bytes
 1:      shrl     $2,%ecx              / dword count
         jz       4f                   / no dwords to move
         cmpl     $32,%ecx
         ja       3f                   / > 32 dwords
         / copy dwords, aligned or not
         subl     %esi,%edi
         .align   16
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         subl     $4,%esi
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
         jmp      4f
         / copy dwords, aligned or not
 3:      rep;     smovl
 4:      movl     %eax,%ecx            / byte count
 5:      andl     $3,%ecx              / suffix byte count
         jz       7f                   / no suffix
         / copy suffix
         subl     %esi,%edi
         addl     $3,%esi
 6:      movb     (%esi),%dl
         movb     %dl,(%edi,%esi,1)
 	subl     $1,%esi
         subl     $1,%ecx
         jnz      6b
 7:      cld
         popl     %edi
         popl     %esi
         ret

         / Support for void Copy::arrayof_conjoint_bytes(void* from,
         /                                               void* to,
         /                                               size_t count)
         /
         / Same as _Copy_conjoint_bytes, except no source alignment check.
         .align   16
 _Copy_arrayof_conjoint_bytes:
         pushl    %esi
         movl     4+12(%esp),%ecx      / count
         pushl    %edi
         movl     8+ 4(%esp),%esi      / from
         movl     8+ 8(%esp),%edi      / to
         cmpl     %esi,%edi
         leal     -1(%esi,%ecx),%eax   / from + count - 1
         jbe      acb_CopyRight
         cmpl     %eax,%edi
         jbe      acb_CopyLeft
         / copy from low to high
 acb_CopyRight:
         cmpl     $3,%ecx
         jbe      5f
 1:      movl     %ecx,%eax
         shrl     $2,%ecx
         jz       4f
         cmpl     $32,%ecx
         ja       3f
         / copy aligned dwords
         subl     %esi,%edi
         .align   16
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         addl     $4,%esi
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
         jmp      4f
         / copy aligned dwords
 3:      rep;     smovl
 4:      movl     %eax,%ecx
 5:      andl     $3,%ecx
         jz       7f
         / copy suffix
         xorl     %eax,%eax
 6:      movb     (%esi,%eax,1),%dl
         movb     %dl,(%edi,%eax,1)
         addl     $1,%eax
         subl     $1,%ecx
         jnz      6b
 7:      popl     %edi
         popl     %esi
         ret
 acb_CopyLeft:
         std
         leal     -4(%edi,%ecx),%edi   / to + count - 4
         movl     %eax,%esi            / from + count - 1
         movl     %ecx,%eax
         subl     $3,%esi              / from + count - 4
         cmpl     $3,%ecx
         jbe      5f
 1:      shrl     $2,%ecx
         jz       4f
         cmpl     $32,%ecx
         jbe      2f                   / <= 32 dwords
         rep;     smovl
         jmp      4f
 	.=.+8
 2:      subl     %esi,%edi
         .align   16
 3:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         subl     $4,%esi
         subl     $1,%ecx
         jnz      3b
         addl     %esi,%edi
 4:      movl     %eax,%ecx
 5:      andl     $3,%ecx
         jz       7f
         subl     %esi,%edi
         addl     $3,%esi
 6:      movb     (%esi),%dl
         movb     %dl,(%edi,%esi,1)
 	subl     $1,%esi
         subl     $1,%ecx
         jnz      6b
 7:      cld
         popl     %edi
         popl     %esi
         ret

         / Support for void Copy::conjoint_jshorts_atomic(void* from,
         /                                                void* to,
         /                                                size_t count)
         .align   16
 _Copy_conjoint_jshorts_atomic:
         pushl    %esi
         movl     4+12(%esp),%ecx      / count
         pushl    %edi
         movl     8+ 4(%esp),%esi      / from
         movl     8+ 8(%esp),%edi      / to
         cmpl     %esi,%edi
         leal     -2(%esi,%ecx,2),%eax / from + count*2 - 2
         jbe      cs_CopyRight
         cmpl     %eax,%edi
         jbe      cs_CopyLeft
         / copy from low to high
 cs_CopyRight:
         / align source address at dword address boundary
         movl     %esi,%eax            / original from
         andl     $3,%eax              / either 0 or 2
         jz       1f                   / no prefix
         / copy prefix
         subl     $1,%ecx
         jl       5f                   / zero count
         movw     (%esi),%dx
         movw     %dx,(%edi)
         addl     %eax,%esi            / %eax == 2
         addl     %eax,%edi
 1:      movl     %ecx,%eax            / word count less prefix
         sarl     %ecx                 / dword count
         jz       4f                   / no dwords to move
         cmpl     $32,%ecx
         jbe      2f                   / <= 32 dwords
         / copy aligned dwords
         rep;     smovl
         jmp      4f
         / copy aligned dwords
 2:      subl     %esi,%edi
         .align   16
 3:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         addl     $4,%esi
         subl     $1,%ecx
         jnz      3b
         addl     %esi,%edi
 4:      andl     $1,%eax              / suffix count
         jz       5f                   / no suffix
         / copy suffix
         movw     (%esi),%dx
         movw     %dx,(%edi)
 5:      popl     %edi
         popl     %esi
         ret
         / copy from high to low
 cs_CopyLeft:
         std
         leal     -4(%edi,%ecx,2),%edi / to + count*2 - 4
         movl     %eax,%esi            / from + count*2 - 2
         movl     %ecx,%eax
         subl     $2,%esi              / from + count*2 - 4
 1:      sarl     %ecx                 / dword count
         jz       4f                   / no dwords to move
         cmpl     $32,%ecx
         ja       3f                   / > 32 dwords
         subl     %esi,%edi
         .align   16
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         subl     $4,%esi
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
         jmp      4f
 3:      rep;     smovl
 4:      andl     $1,%eax              / suffix count
         jz       5f                   / no suffix
         / copy suffix
         addl     $2,%esi
         addl     $2,%edi
         movw     (%esi),%dx
         movw     %dx,(%edi)
 5:      cld
         popl     %edi
         popl     %esi
         ret

         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
         /                                                 void* to,
         /                                                 size_t count)
         .align   16
 _Copy_arrayof_conjoint_jshorts:
         pushl    %esi
         movl     4+12(%esp),%ecx      / count
         pushl    %edi
         movl     8+ 4(%esp),%esi      / from
         movl     8+ 8(%esp),%edi      / to
         cmpl     %esi,%edi
         leal     -2(%esi,%ecx,2),%eax / from + count*2 - 2
         jbe      acs_CopyRight
         cmpl     %eax,%edi
         jbe      acs_CopyLeft
 acs_CopyRight:
         movl     %ecx,%eax            / word count
         sarl     %ecx                 / dword count
         jz       4f                   / no dwords to move
         cmpl     $32,%ecx
         jbe      2f                   / <= 32 dwords
         / copy aligned dwords
         rep;     smovl
         jmp      4f
         / copy aligned dwords
         .=.+5
 2:      subl     %esi,%edi
         .align   16
 3:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         addl     $4,%esi
         subl     $1,%ecx
         jnz      3b
         addl     %esi,%edi
 4:      andl     $1,%eax              / suffix count
         jz       5f                   / no suffix
         / copy suffix
         movw     (%esi),%dx
         movw     %dx,(%edi)
 5:      popl     %edi
         popl     %esi
         ret
 acs_CopyLeft:
         std
         leal     -4(%edi,%ecx,2),%edi / to + count*2 - 4
         movl     %eax,%esi            / from + count*2 - 2
         movl     %ecx,%eax
         subl     $2,%esi              / from + count*2 - 4
         sarl     %ecx                 / dword count
         jz       4f                   / no dwords to move
         cmpl     $32,%ecx
         ja       3f                   / > 32 dwords
         subl     %esi,%edi
         .align   16
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         subl     $4,%esi
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
         jmp      4f
 3:      rep;     smovl
 4:      andl     $1,%eax              / suffix count
         jz       5f                   / no suffix
         / copy suffix
         addl     $2,%esi
         addl     $2,%edi
         movw     (%esi),%dx
         movw     %dx,(%edi)
 5:      cld
         popl     %edi
         popl     %esi
         ret

         / Support for void Copy::conjoint_jints_atomic(void* from,
         /                                              void* to,
         /                                              size_t count)
         / Equivalent to
         /   arrayof_conjoint_jints
         .align   16
 _Copy_conjoint_jints_atomic:
 _Copy_arrayof_conjoint_jints:
         pushl    %esi
         movl     4+12(%esp),%ecx      / count
         pushl    %edi
         movl     8+ 4(%esp),%esi      / from
         movl     8+ 8(%esp),%edi      / to
         cmpl     %esi,%edi
         leal     -4(%esi,%ecx,4),%eax / from + count*4 - 4
         jbe      ci_CopyRight
         cmpl     %eax,%edi
         jbe      ci_CopyLeft
 ci_CopyRight:
         cmpl     $32,%ecx
         jbe      2f                   / <= 32 dwords
         rep;     smovl
         popl     %edi
         popl     %esi
         ret
         .=.+10
 2:      subl     %esi,%edi
         jmp      4f
         .align   16
 3:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         addl     $4,%esi
 4:      subl     $1,%ecx
         jge      3b
         popl     %edi
         popl     %esi
         ret
 ci_CopyLeft:
         std
         leal     -4(%edi,%ecx,4),%edi / to + count*4 - 4
         cmpl     $32,%ecx
         ja       4f                   / > 32 dwords
         subl     %eax,%edi            / eax == from + count*4 - 4
         jmp      3f
         .align   16
 2:      movl     (%eax),%edx
         movl     %edx,(%edi,%eax,1)
         subl     $4,%eax
 3:      subl     $1,%ecx
         jge      2b
         cld
         popl     %edi
         popl     %esi
         ret
 4:      movl     %eax,%esi            / from + count*4 - 4
         rep;     smovl
         cld
         popl     %edi
         popl     %esi
         ret

         / Support for void Copy::conjoint_jlongs_atomic(jlong* from,
         /                                               jlong* to,
         /                                               size_t count)
         /
         / 32-bit
         /
         / count treated as signed
         /
         / if (from > to) {
         /   while (--count >= 0) {
         /     *to++ = *from++;
         /   }
         / } else {
         /   while (--count >= 0) {
         /     to[count] = from[count];
         /   }
         / }
         .align   16
 _Copy_conjoint_jlongs_atomic:
         movl     4+8(%esp),%ecx       / count
         movl     4+0(%esp),%eax       / from
         movl     4+4(%esp),%edx       / to
         cmpl     %eax,%edx
         jae      cla_CopyLeft
 cla_CopyRight:
         subl     %eax,%edx
         jmp      2f
         .align   16
 1:      fildll   (%eax)
         fistpll  (%edx,%eax,1)
         addl     $8,%eax
 2:      subl     $1,%ecx
         jge      1b
         ret
         .align   16
 3:      fildll   (%eax,%ecx,8)
         fistpll  (%edx,%ecx,8)
 cla_CopyLeft:
         subl     $1,%ecx
         jge      3b
         ret

         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
         /                                                 void* to,
         /                                                 size_t count)
        .align   16
 _mmx_Copy_arrayof_conjoint_jshorts:
         pushl    %esi
         movl     4+12(%esp),%ecx
         pushl    %edi
         movl     8+ 4(%esp),%esi
         movl     8+ 8(%esp),%edi
         cmpl     %esi,%edi
         leal     -2(%esi,%ecx,2),%eax
         jbe      mmx_acs_CopyRight
         cmpl     %eax,%edi
         jbe      mmx_acs_CopyLeft
 mmx_acs_CopyRight:
         movl     %ecx,%eax
         sarl     %ecx
         je       5f
         cmpl     $33,%ecx
         jae      3f
 1:      subl     %esi,%edi
         .align   16
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         addl     $4,%esi
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
         jmp      5f
 3:      smovl / align to 8 bytes, we know we are 4 byte aligned to start
         subl     $1,%ecx
 4:      .align   16
         movq     0(%esi),%mm0
         addl     $64,%edi
         movq     8(%esi),%mm1
         subl     $16,%ecx
         movq     16(%esi),%mm2
         movq     %mm0,-64(%edi)
         movq     24(%esi),%mm0
         movq     %mm1,-56(%edi)
         movq     32(%esi),%mm1
         movq     %mm2,-48(%edi)
         movq     40(%esi),%mm2
         movq     %mm0,-40(%edi)
         movq     48(%esi),%mm0
         movq     %mm1,-32(%edi)
         movq     56(%esi),%mm1
         movq     %mm2,-24(%edi)
         movq     %mm0,-16(%edi)
         addl     $64,%esi
         movq     %mm1,-8(%edi)
         cmpl     $16,%ecx
         jge      4b
         emms
 	testl    %ecx,%ecx
 	ja       1b
 5:      andl     $1,%eax
         je       7f
 6:      movw     (%esi),%dx
         movw     %dx,(%edi)
 7:      popl     %edi
         popl     %esi
         ret
 mmx_acs_CopyLeft:
         std
         leal     -4(%edi,%ecx,2),%edi
         movl     %eax,%esi
         movl     %ecx,%eax
         subl     $2,%esi
         sarl     %ecx
         je       4f
         cmpl     $32,%ecx
         ja       3f
         subl     %esi,%edi
         .align   16
 2:      movl     (%esi),%edx
         movl     %edx,(%edi,%esi,1)
         subl     $4,%esi
         subl     $1,%ecx
         jnz      2b
         addl     %esi,%edi
         jmp      4f
 3:      rep;     smovl
 4:      andl     $1,%eax
         je       6f
         addl     $2,%esi
         addl     $2,%edi
 5:      movw     (%esi),%dx
         movw     %dx,(%edi)
 6:      cld
         popl     %edi
         popl     %esi
         ret


         / Support for jlong Atomic::cmpxchg(jlong exchange_value,
         /                                   volatile jlong* dest,
         /                                   jlong compare_value,
         /                                   bool is_MP)
         / Used only for Solaris/gcc builds
         .align 16
 _Atomic_cmpxchg_long_gcc:
                                    /  8(%esp) : return PC
         pushl    %ebx              /  4(%esp) : old %ebx
         pushl    %edi              /  0(%esp) : old %edi
         movl     12(%esp), %ebx    / 12(%esp) : exchange_value (low)
         movl     16(%esp), %ecx    / 16(%esp) : exchange_value (high)
         movl     24(%esp), %eax    / 24(%esp) : compare_value (low)
         movl     28(%esp), %edx    / 28(%esp) : compare_value (high)
         movl     20(%esp), %edi    / 20(%esp) : dest
         cmpl     $0, 32(%esp)      / 32(%esp) : is_MP
         je       1f
         lock
 1:      cmpxchg8b (%edi)
         popl     %edi
         popl     %ebx
         ret
	//
	// Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
	// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	//
	// This code is free software; you can redistribute it and/or modify it
	// under the terms of the GNU General Public License version 2 only, as
	// published by the Free Software Foundation.
	//
	// This code is distributed in the hope that it will be useful, but WITHOUT
	// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	// version 2 for more details (a copy is included in the LICENSE file that
	// accompanied this code).
	//
	// You should have received a copy of the GNU General Public License version
	// 2 along with this work; if not, write to the Free Software Foundation,
	// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	//
	// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	// or visit www.oracle.com if you need additional information or have any
	// questions.
	//

	.globl fixcw
	.globl sse_check
	.globl sse_unavailable
	.globl gs_load
	.globl gs_thread
	.globl _Atomic_cmpxchg_long_gcc

	// NOTE WELL! The _Copy functions are called directly
	// from server-compiler-generated code via CallLeafNoFP,
	// which means that they must either not use floating
	// point or use it in the same manner as does the server
	// compiler.

	.globl _Copy_conjoint_bytes
	.globl _Copy_arrayof_conjoint_bytes
	.globl _Copy_conjoint_jshorts_atomic
	.globl _Copy_arrayof_conjoint_jshorts
	.globl _Copy_conjoint_jints_atomic
	.globl _Copy_arrayof_conjoint_jints
	.globl _Copy_conjoint_jlongs_atomic
	.globl _mmx_Copy_arrayof_conjoint_jshorts

	.section .text,"ax"

	/ Support for void os::Solaris::init_thread_fpu_state() in os_solaris_i486.cpp
	/ Set fpu to 53 bit precision. This happens too early to use a stub.
	.align 16
	fixcw:
	pushl $0x27f
	fldcw 0(%esp)
	popl %eax
	ret

	.align 16
	.globl SpinPause
	SpinPause:
	rep
	nop
	movl $1, %eax
	ret


	/ Test SSE availability, used by os_solaris_i486.cpp
	.align 16
	sse_check:
	/ Fault if SSE not available
	xorps %xmm0,%xmm0
	/ No fault
	movl $1,%eax
	ret
	/ Signal handler continues here if SSE is not available
	sse_unavailable:
	xorl %eax,%eax
	ret

	/ Fast thread accessors, used by threadLS_solaris_i486.cpp
	.align 16
	gs_load:
	movl 4(%esp),%ecx
	movl %gs:(%ecx),%eax
	ret

	.align 16
	gs_thread:
	movl %gs:0x0,%eax
	ret

	/ Support for void Copy::conjoint_bytes(void* from,
	/ void* to,
	/ size_t count)
	.align 16
	_Copy_conjoint_bytes:
	pushl %esi
	movl 4+12(%esp),%ecx / count
	pushl %edi
	movl 8+ 4(%esp),%esi / from
	movl 8+ 8(%esp),%edi / to
	cmpl %esi,%edi
	leal -1(%esi,%ecx),%eax / from + count - 1
	jbe cb_CopyRight
	cmpl %eax,%edi
	jbe cb_CopyLeft
	/ copy from low to high
	cb_CopyRight:
	cmpl $3,%ecx
	jbe 5f / <= 3 bytes
	/ align source address at dword address boundary
	movl %ecx,%eax / original count
	movl $4,%ecx
	subl %esi,%ecx
	andl $3,%ecx / prefix byte count
	jz 1f / no prefix
	subl %ecx,%eax / byte count less prefix
	/ copy prefix
	subl %esi,%edi
	0: movb (%esi),%dl
	movb %dl,(%edi,%esi,1)
	addl $1,%esi
	subl $1,%ecx
	jnz 0b
	addl %esi,%edi
	1: movl %eax,%ecx / byte count less prefix
	shrl $2,%ecx / dword count
	jz 4f / no dwords to move
	cmpl $32,%ecx
	jbe 2f / <= 32 dwords
	/ copy aligned dwords
	rep; smovl
	jmp 4f
	/ copy aligned dwords
	2: subl %esi,%edi
	.align 16
	3: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	addl $4,%esi
	subl $1,%ecx
	jnz 3b
	addl %esi,%edi
	4: movl %eax,%ecx / byte count less prefix
	5: andl $3,%ecx / suffix byte count
	jz 7f / no suffix
	/ copy suffix
	xorl %eax,%eax
	6: movb (%esi,%eax,1),%dl
	movb %dl,(%edi,%eax,1)
	addl $1,%eax
	subl $1,%ecx
	jnz 6b
	7: popl %edi
	popl %esi
	ret
	/ copy from high to low
	cb_CopyLeft:
	std
	leal -4(%edi,%ecx),%edi / to + count - 4
	movl %eax,%esi / from + count - 1
	movl %ecx,%eax
	subl $3,%esi / from + count - 4
	cmpl $3,%ecx
	jbe 5f / <= 3 bytes
	1: shrl $2,%ecx / dword count
	jz 4f / no dwords to move
	cmpl $32,%ecx
	ja 3f / > 32 dwords
	/ copy dwords, aligned or not
	subl %esi,%edi
	.align 16
	2: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	subl $4,%esi
	subl $1,%ecx
	jnz 2b
	addl %esi,%edi
	jmp 4f
	/ copy dwords, aligned or not
	3: rep; smovl
	4: movl %eax,%ecx / byte count
	5: andl $3,%ecx / suffix byte count
	jz 7f / no suffix
	/ copy suffix
	subl %esi,%edi
	addl $3,%esi
	6: movb (%esi),%dl
	movb %dl,(%edi,%esi,1)
	subl $1,%esi
	subl $1,%ecx
	jnz 6b
	7: cld
	popl %edi
	popl %esi
	ret

	/ Support for void Copy::arrayof_conjoint_bytes(void* from,
	/ void* to,
	/ size_t count)
	/
	/ Same as _Copy_conjoint_bytes, except no source alignment check.
	.align 16
	_Copy_arrayof_conjoint_bytes:
	pushl %esi
	movl 4+12(%esp),%ecx / count
	pushl %edi
	movl 8+ 4(%esp),%esi / from
	movl 8+ 8(%esp),%edi / to
	cmpl %esi,%edi
	leal -1(%esi,%ecx),%eax / from + count - 1
	jbe acb_CopyRight
	cmpl %eax,%edi
	jbe acb_CopyLeft
	/ copy from low to high
	acb_CopyRight:
	cmpl $3,%ecx
	jbe 5f
	1: movl %ecx,%eax
	shrl $2,%ecx
	jz 4f
	cmpl $32,%ecx
	ja 3f
	/ copy aligned dwords
	subl %esi,%edi
	.align 16
	2: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	addl $4,%esi
	subl $1,%ecx
	jnz 2b
	addl %esi,%edi
	jmp 4f
	/ copy aligned dwords
	3: rep; smovl
	4: movl %eax,%ecx
	5: andl $3,%ecx
	jz 7f
	/ copy suffix
	xorl %eax,%eax
	6: movb (%esi,%eax,1),%dl
	movb %dl,(%edi,%eax,1)
	addl $1,%eax
	subl $1,%ecx
	jnz 6b
	7: popl %edi
	popl %esi
	ret
	acb_CopyLeft:
	std
	leal -4(%edi,%ecx),%edi / to + count - 4
	movl %eax,%esi / from + count - 1
	movl %ecx,%eax
	subl $3,%esi / from + count - 4
	cmpl $3,%ecx
	jbe 5f
	1: shrl $2,%ecx
	jz 4f
	cmpl $32,%ecx
	jbe 2f / <= 32 dwords
	rep; smovl
	jmp 4f
	.=.+8
	2: subl %esi,%edi
	.align 16
	3: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	subl $4,%esi
	subl $1,%ecx
	jnz 3b
	addl %esi,%edi
	4: movl %eax,%ecx
	5: andl $3,%ecx
	jz 7f
	subl %esi,%edi
	addl $3,%esi
	6: movb (%esi),%dl
	movb %dl,(%edi,%esi,1)
	subl $1,%esi
	subl $1,%ecx
	jnz 6b
	7: cld
	popl %edi
	popl %esi
	ret

	/ Support for void Copy::conjoint_jshorts_atomic(void* from,
	/ void* to,
	/ size_t count)
	.align 16
	_Copy_conjoint_jshorts_atomic:
	pushl %esi
	movl 4+12(%esp),%ecx / count
	pushl %edi
	movl 8+ 4(%esp),%esi / from
	movl 8+ 8(%esp),%edi / to
	cmpl %esi,%edi
	leal -2(%esi,%ecx,2),%eax / from + count*2 - 2
	jbe cs_CopyRight
	cmpl %eax,%edi
	jbe cs_CopyLeft
	/ copy from low to high
	cs_CopyRight:
	/ align source address at dword address boundary
	movl %esi,%eax / original from
	andl $3,%eax / either 0 or 2
	jz 1f / no prefix
	/ copy prefix
	subl $1,%ecx
	jl 5f / zero count
	movw (%esi),%dx
	movw %dx,(%edi)
	addl %eax,%esi / %eax == 2
	addl %eax,%edi
	1: movl %ecx,%eax / word count less prefix
	sarl %ecx / dword count
	jz 4f / no dwords to move
	cmpl $32,%ecx
	jbe 2f / <= 32 dwords
	/ copy aligned dwords
	rep; smovl
	jmp 4f
	/ copy aligned dwords
	2: subl %esi,%edi
	.align 16
	3: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	addl $4,%esi
	subl $1,%ecx
	jnz 3b
	addl %esi,%edi
	4: andl $1,%eax / suffix count
	jz 5f / no suffix
	/ copy suffix
	movw (%esi),%dx
	movw %dx,(%edi)
	5: popl %edi
	popl %esi
	ret
	/ copy from high to low
	cs_CopyLeft:
	std
	leal -4(%edi,%ecx,2),%edi / to + count*2 - 4
	movl %eax,%esi / from + count*2 - 2
	movl %ecx,%eax
	subl $2,%esi / from + count*2 - 4
	1: sarl %ecx / dword count
	jz 4f / no dwords to move
	cmpl $32,%ecx
	ja 3f / > 32 dwords
	subl %esi,%edi
	.align 16
	2: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	subl $4,%esi
	subl $1,%ecx
	jnz 2b
	addl %esi,%edi
	jmp 4f
	3: rep; smovl
	4: andl $1,%eax / suffix count
	jz 5f / no suffix
	/ copy suffix
	addl $2,%esi
	addl $2,%edi
	movw (%esi),%dx
	movw %dx,(%edi)
	5: cld
	popl %edi
	popl %esi
	ret

	/ Support for void Copy::arrayof_conjoint_jshorts(void* from,
	/ void* to,
	/ size_t count)
	.align 16
	_Copy_arrayof_conjoint_jshorts:
	pushl %esi
	movl 4+12(%esp),%ecx / count
	pushl %edi
	movl 8+ 4(%esp),%esi / from
	movl 8+ 8(%esp),%edi / to
	cmpl %esi,%edi
	leal -2(%esi,%ecx,2),%eax / from + count*2 - 2
	jbe acs_CopyRight
	cmpl %eax,%edi
	jbe acs_CopyLeft
	acs_CopyRight:
	movl %ecx,%eax / word count
	sarl %ecx / dword count
	jz 4f / no dwords to move
	cmpl $32,%ecx
	jbe 2f / <= 32 dwords
	/ copy aligned dwords
	rep; smovl
	jmp 4f
	/ copy aligned dwords
	.=.+5
	2: subl %esi,%edi
	.align 16
	3: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	addl $4,%esi
	subl $1,%ecx
	jnz 3b
	addl %esi,%edi
	4: andl $1,%eax / suffix count
	jz 5f / no suffix
	/ copy suffix
	movw (%esi),%dx
	movw %dx,(%edi)
	5: popl %edi
	popl %esi
	ret
	acs_CopyLeft:
	std
	leal -4(%edi,%ecx,2),%edi / to + count*2 - 4
	movl %eax,%esi / from + count*2 - 2
	movl %ecx,%eax
	subl $2,%esi / from + count*2 - 4
	sarl %ecx / dword count
	jz 4f / no dwords to move
	cmpl $32,%ecx
	ja 3f / > 32 dwords
	subl %esi,%edi
	.align 16
	2: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	subl $4,%esi
	subl $1,%ecx
	jnz 2b
	addl %esi,%edi
	jmp 4f
	3: rep; smovl
	4: andl $1,%eax / suffix count
	jz 5f / no suffix
	/ copy suffix
	addl $2,%esi
	addl $2,%edi
	movw (%esi),%dx
	movw %dx,(%edi)
	5: cld
	popl %edi
	popl %esi
	ret

	/ Support for void Copy::conjoint_jints_atomic(void* from,
	/ void* to,
	/ size_t count)
	/ Equivalent to
	/ arrayof_conjoint_jints
	.align 16
	_Copy_conjoint_jints_atomic:
	_Copy_arrayof_conjoint_jints:
	pushl %esi
	movl 4+12(%esp),%ecx / count
	pushl %edi
	movl 8+ 4(%esp),%esi / from
	movl 8+ 8(%esp),%edi / to
	cmpl %esi,%edi
	leal -4(%esi,%ecx,4),%eax / from + count*4 - 4
	jbe ci_CopyRight
	cmpl %eax,%edi
	jbe ci_CopyLeft
	ci_CopyRight:
	cmpl $32,%ecx
	jbe 2f / <= 32 dwords
	rep; smovl
	popl %edi
	popl %esi
	ret
	.=.+10
	2: subl %esi,%edi
	jmp 4f
	.align 16
	3: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	addl $4,%esi
	4: subl $1,%ecx
	jge 3b
	popl %edi
	popl %esi
	ret
	ci_CopyLeft:
	std
	leal -4(%edi,%ecx,4),%edi / to + count*4 - 4
	cmpl $32,%ecx
	ja 4f / > 32 dwords
	subl %eax,%edi / eax == from + count*4 - 4
	jmp 3f
	.align 16
	2: movl (%eax),%edx
	movl %edx,(%edi,%eax,1)
	subl $4,%eax
	3: subl $1,%ecx
	jge 2b
	cld
	popl %edi
	popl %esi
	ret
	4: movl %eax,%esi / from + count*4 - 4
	rep; smovl
	cld
	popl %edi
	popl %esi
	ret

	/ Support for void Copy::conjoint_jlongs_atomic(jlong* from,
	/ jlong* to,
	/ size_t count)
	/
	/ 32-bit
	/
	/ count treated as signed
	/
	/ if (from > to) {
	/ while (--count >= 0) {
	/ to++ = from++;
	/ }
	/ } else {
	/ while (--count >= 0) {
	/ to[count] = from[count];
	/ }
	/ }
	.align 16
	_Copy_conjoint_jlongs_atomic:
	movl 4+8(%esp),%ecx / count
	movl 4+0(%esp),%eax / from
	movl 4+4(%esp),%edx / to
	cmpl %eax,%edx
	jae cla_CopyLeft
	cla_CopyRight:
	subl %eax,%edx
	jmp 2f
	.align 16
	1: fildll (%eax)
	fistpll (%edx,%eax,1)
	addl $8,%eax
	2: subl $1,%ecx
	jge 1b
	ret
	.align 16
	3: fildll (%eax,%ecx,8)
	fistpll (%edx,%ecx,8)
	cla_CopyLeft:
	subl $1,%ecx
	jge 3b
	ret

	/ Support for void Copy::arrayof_conjoint_jshorts(void* from,
	/ void* to,
	/ size_t count)
	.align 16
	_mmx_Copy_arrayof_conjoint_jshorts:
	pushl %esi
	movl 4+12(%esp),%ecx
	pushl %edi
	movl 8+ 4(%esp),%esi
	movl 8+ 8(%esp),%edi
	cmpl %esi,%edi
	leal -2(%esi,%ecx,2),%eax
	jbe mmx_acs_CopyRight
	cmpl %eax,%edi
	jbe mmx_acs_CopyLeft
	mmx_acs_CopyRight:
	movl %ecx,%eax
	sarl %ecx
	je 5f
	cmpl $33,%ecx
	jae 3f
	1: subl %esi,%edi
	.align 16
	2: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	addl $4,%esi
	subl $1,%ecx
	jnz 2b
	addl %esi,%edi
	jmp 5f
	3: smovl / align to 8 bytes, we know we are 4 byte aligned to start
	subl $1,%ecx
	4: .align 16
	movq 0(%esi),%mm0
	addl $64,%edi
	movq 8(%esi),%mm1
	subl $16,%ecx
	movq 16(%esi),%mm2
	movq %mm0,-64(%edi)
	movq 24(%esi),%mm0
	movq %mm1,-56(%edi)
	movq 32(%esi),%mm1
	movq %mm2,-48(%edi)
	movq 40(%esi),%mm2
	movq %mm0,-40(%edi)
	movq 48(%esi),%mm0
	movq %mm1,-32(%edi)
	movq 56(%esi),%mm1
	movq %mm2,-24(%edi)
	movq %mm0,-16(%edi)
	addl $64,%esi
	movq %mm1,-8(%edi)
	cmpl $16,%ecx
	jge 4b
	emms
	testl %ecx,%ecx
	ja 1b
	5: andl $1,%eax
	je 7f
	6: movw (%esi),%dx
	movw %dx,(%edi)
	7: popl %edi
	popl %esi
	ret
	mmx_acs_CopyLeft:
	std
	leal -4(%edi,%ecx,2),%edi
	movl %eax,%esi
	movl %ecx,%eax
	subl $2,%esi
	sarl %ecx
	je 4f
	cmpl $32,%ecx
	ja 3f
	subl %esi,%edi
	.align 16
	2: movl (%esi),%edx
	movl %edx,(%edi,%esi,1)
	subl $4,%esi
	subl $1,%ecx
	jnz 2b
	addl %esi,%edi
	jmp 4f
	3: rep; smovl
	4: andl $1,%eax
	je 6f
	addl $2,%esi
	addl $2,%edi
	5: movw (%esi),%dx
	movw %dx,(%edi)
	6: cld
	popl %edi
	popl %esi
	ret


	/ Support for jlong Atomic::cmpxchg(jlong exchange_value,
	/ volatile jlong* dest,
	/ jlong compare_value,
	/ bool is_MP)
	/ Used only for Solaris/gcc builds
	.align 16
	_Atomic_cmpxchg_long_gcc:
	/ 8(%esp) : return PC
	pushl %ebx / 4(%esp) : old %ebx
	pushl %edi / 0(%esp) : old %edi
	movl 12(%esp), %ebx / 12(%esp) : exchange_value (low)
	movl 16(%esp), %ecx / 16(%esp) : exchange_value (high)
	movl 24(%esp), %eax / 24(%esp) : compare_value (low)
	movl 28(%esp), %edx / 28(%esp) : compare_value (high)
	movl 20(%esp), %edi / 20(%esp) : dest
	cmpl $0, 32(%esp) / 32(%esp) : is_MP
	je 1f
	lock
	1: cmpxchg8b (%edi)
	popl %edi
	popl %ebx
	ret