blob: 8261781c3ebb583f6094331fd998d9e5b214437f [file] [log] [blame]
!
!
!
! Copyright 2000 Sun Microsystems, Inc. All Rights Reserved.
! DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
!
! This code is free software; you can redistribute it and/or modify it
! under the terms of the GNU General Public License version 2 only, as
! published by the Free Software Foundation. Oracle designates this
! particular file as subject to the "Classpath" exception as provided
! by Oracle in the LICENSE file that accompanied this code.
!
! This code is distributed in the hope that it will be useful, but WITHOUT
! ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
! FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
! version 2 for more details (a copy is included in the LICENSE file that
! accompanied this code).
!
! You should have received a copy of the GNU General Public License version
! 2 along with this work; if not, write to the Free Software Foundation,
! Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
!
! Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
! or visit www.oracle.com if you need additional information or have any
! questions.
!
! FUNCTION
! mlib_v_ImageCopy_blk - Copy an image into another
! (with Block Load/Store)
!
! SYNOPSIS
! void mlib_v_ImageCopy_blk(void *src,
! void *dst,
! int size);
!
! ARGUMENT
! src source image data
! dst destination image data
! size image size in bytes
!
! NOTES
! src and dst must point to 64-byte aligned addresses
! size must be multiple of 64
!
! DESCRIPTION
! dst = src
!
#include "vis_asi.h"
! Minimum size of stack frame according to SPARC ABI
#define MINFRAME 96
! ENTRY provides the standard procedure entry code
#define ENTRY(x) \
.align 4; \
.global x; \
x:
! SET_SIZE trails a function and sets the size for the ELF symbol table
#define SET_SIZE(x) \
.size x, (.-x)
! SPARC have four integer register groups. i-registers %i0 to %i7
! hold input data. o-registers %o0 to %o7 hold output data. l-registers
! %l0 to %l7 hold local data. g-registers %g0 to %g7 hold global data.
! Note that %g0 is alway zero, write to it has no program-visible effect.
! When calling an assembly function, the first 6 arguments are stored
! in i-registers from %i0 to %i5. The rest arguments are stored in stack.
! Note that %i6 is reserved for stack pointer and %i7 for return address.
! Only the first 32 f-registers can be used as 32-bit registers.
! The last 32 f-registers can only be used as 16 64-bit registers.
#define src %i0
#define dst %i1
#define sz %i2
!frame pointer %i6
!return addr %i7
!stack pointer %o6
!call link %o7
#define sa %l0
#define da %l1
#define se %l2
#define ns %l3
#define O0 %f16
#define O1 %f18
#define O2 %f20
#define O3 %f22
#define O4 %f24
#define O5 %f26
#define O6 %f28
#define O7 %f30
#define A0 %f32
#define A1 %f34
#define A2 %f36
#define A3 %f38
#define A4 %f40
#define A5 %f42
#define A6 %f44
#define A7 %f46
#define B0 %f48
#define B1 %f50
#define B2 %f52
#define B3 %f54
#define B4 %f56
#define B5 %f58
#define B6 %f60
#define B7 %f62
#define USE_BLD
#define USE_BST
#define MEMBAR_BEFORE_BLD membar #StoreLoad
#define MEMBAR_AFTER_BLD membar #StoreLoad
#ifdef USE_BLD
#define BLD_A0 \
ldda [sa]ASI_BLK_P,A0; \
cmp sa,se; \
blu,pt %icc,1f; \
inc 64,sa; \
dec 64,sa; \
1:
#else
#define BLD_A0 \
ldd [sa + 0],A0; \
ldd [sa + 8],A1; \
ldd [sa + 16],A2; \
ldd [sa + 24],A3; \
ldd [sa + 32],A4; \
ldd [sa + 40],A5; \
ldd [sa + 48],A6; \
ldd [sa + 56],A7; \
cmp sa,se; \
blu,pt %icc,1f; \
inc 64,sa; \
dec 64,sa; \
1:
#endif
#ifdef USE_BLD
#define BLD_B0 \
ldda [sa]ASI_BLK_P,B0; \
cmp sa,se; \
blu,pt %icc,1f; \
inc 64,sa; \
dec 64,sa; \
1:
#else
#define BLD_B0 \
ldd [sa + 0],B0; \
ldd [sa + 8],B1; \
ldd [sa + 16],B2; \
ldd [sa + 24],B3; \
ldd [sa + 32],B4; \
ldd [sa + 40],B5; \
ldd [sa + 48],B6; \
ldd [sa + 56],B7; \
cmp sa,se; \
blu,pt %icc,1f; \
inc 64,sa; \
dec 64,sa; \
1:
#endif
#ifdef USE_BST
#define BST \
stda O0,[da]ASI_BLK_P; \
inc 64,da; \
deccc ns; \
ble,pn %icc,mlib_v_ImageCopy_end; \
nop
#else
#define BST \
std O0,[da + 0]; \
std O1,[da + 8]; \
std O2,[da + 16]; \
std O3,[da + 24]; \
std O4,[da + 32]; \
std O5,[da + 40]; \
std O6,[da + 48]; \
std O7,[da + 56]; \
inc 64,da; \
deccc ns; \
ble,pn %icc,mlib_v_ImageCopy_end; \
nop
#endif
#define COPY_A0 \
fmovd A0, O0; \
fmovd A1, O1; \
fmovd A2, O2; \
fmovd A3, O3; \
fmovd A4, O4; \
fmovd A5, O5; \
fmovd A6, O6; \
fmovd A7, O7;
#define COPY_B0 \
fmovd B0, O0; \
fmovd B1, O1; \
fmovd B2, O2; \
fmovd B3, O3; \
fmovd B4, O4; \
fmovd B5, O5; \
fmovd B6, O6; \
fmovd B7, O7;
.section ".text",#alloc,#execinstr
ENTRY(mlib_v_ImageCopy_blk) ! function name
save %sp,-MINFRAME,%sp ! reserve space for stack
! and adjust register window
! do some error checking
tst sz ! size > 0
ble,pn %icc,mlib_v_ImageCopy_ret
! calculate loop count
sra sz,6,ns ! 64 bytes per loop
add src,sz,se ! end address of source
mov src,sa
mov dst,da
! issue memory barrier instruction
MEMBAR_BEFORE_BLD ! to ensure all previous memory load
! and store has completed
BLD_A0
BLD_B0 ! issue the 2nd block load instruction
! to synchronize with returning data
mlib_v_ImageCopy_bgn:
COPY_A0 ! process data returned by BLD_A0
BLD_A0 ! block load and sync data from BLD_B0
BST ! block store data from BLD_A0
COPY_B0 ! process data returned by BLD_B0
BLD_B0 ! block load and sync data from BLD_A0
BST ! block store data from BLD_B0
bg,pt %icc,mlib_v_ImageCopy_bgn
mlib_v_ImageCopy_end:
! issue memory barrier instruction
MEMBAR_AFTER_BLD ! to ensure all previous memory load
! and store has completed.
mlib_v_ImageCopy_ret:
ret ! return
restore ! restore register window
SET_SIZE(mlib_v_ImageCopy_blk)