Handle Special case div and rem by 2 in art interpreter
4% gain in division & 3.1% gain in modulus
operation on Intel(R) Atom Architecture
Test: 411-checker-hdiv-hrem-pow2
Change-Id: I338a51f2a867ed7f7cb1caf851b8fc8c9fa62d10
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/runtime/interpreter/mterp/x86_64/arithmetic.S b/runtime/interpreter/mterp/x86_64/arithmetic.S
index ff64b53..0ef7a83 100644
--- a/runtime/interpreter/mterp/x86_64/arithmetic.S
+++ b/runtime/interpreter/mterp/x86_64/arithmetic.S
@@ -1,4 +1,4 @@
-%def bindiv(result="", second="", wide="", suffix="", rem="0", ext="cdq"):
+%def bindiv(result="", second="", tmp="", wide="", suffix="", rem="0", ext="cdq"):
/*
* 32-bit binary div/rem operation. Handles special case of op1=-1.
*/
@@ -16,6 +16,8 @@
jz common_errDivideByZero
cmp${suffix} $$-1, $second
je 2f
+ cmp${suffix} $$2, $second
+ je 3f
$ext # rdx:rax <- sign-extended of rax
idiv${suffix} $second
1:
@@ -32,8 +34,31 @@
neg${suffix} $result
.endif
jmp 1b
+3:
+ .if $rem
+ mov${suffix} $tmp, $result
+ .if $wide
+ shr${suffix} $$63, $result
+ .else
+ shr${suffix} $$31, $result
+ .endif
+ add${suffix} $tmp, $result
+ and${suffix} $$-2, $result
+ sub${suffix} $result, $tmp
+ mov${suffix} $tmp, $result
+ .else
+ mov${suffix} $result, $tmp
+ .if $wide
+ shr${suffix} $$63, $tmp
+ .else
+ shr${suffix} $$31, $tmp
+ .endif
+ add${suffix} $tmp, $result
+ sar${suffix} $result
+ .endif
+ jmp 1b
-%def bindiv2addr(result="", second="", wide="", suffix="", rem="0", ext="cdq"):
+%def bindiv2addr(result="", second="", tmp="", wide="", suffix="", rem="0", ext="cdq"):
/*
* 32-bit binary div/rem operation. Handles special case of op1=-1.
*/
@@ -52,6 +77,8 @@
jz common_errDivideByZero
cmp${suffix} $$-1, $second
je 2f
+ cmp${suffix} $$2, $second
+ je 3f
$ext # rdx:rax <- sign-extended of rax
idiv${suffix} $second
1:
@@ -68,6 +95,29 @@
neg${suffix} $result
.endif
jmp 1b
+3:
+ .if $rem
+ mov${suffix} $tmp, $result
+ .if $wide
+ shr${suffix} $$63, $result
+ .else
+ shr${suffix} $$31, $result
+ .endif
+ add${suffix} $tmp, $result
+ and${suffix} $$-2, $result
+ sub${suffix} $result, $tmp
+ mov${suffix} $tmp, $result
+ .else
+ mov${suffix} $result, $tmp
+ .if $wide
+ shr${suffix} $$63, $tmp
+ .else
+ shr${suffix} $$31, $tmp
+ .endif
+ add${suffix} $tmp, $result
+ sar${suffix} $result
+ .endif
+ jmp 1b
%def bindivLit16(result="", rem="0"):
/*
@@ -372,10 +422,10 @@
ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
%def op_div_int():
-% bindiv(result="%eax", second="%ecx", wide="0", suffix="l")
+% bindiv(result="%eax", second="%ecx", tmp="%edx", wide="0", suffix="l")
%def op_div_int_2addr():
-% bindiv2addr(result="%eax", second="%ecx", wide="0", suffix="l")
+% bindiv2addr(result="%eax", second="%ecx", tmp="%edx", wide="0", suffix="l")
%def op_div_int_lit16():
% bindivLit16(result="%eax")
@@ -384,10 +434,10 @@
% bindivLit8(result="%eax")
%def op_div_long():
-% bindiv(result="%rax", second="%rcx", wide="1", suffix="q", ext="cqo")
+% bindiv(result="%rax", second="%rcx", tmp="%rdx", wide="1", suffix="q", ext="cqo")
%def op_div_long_2addr():
-% bindiv2addr(result="%rax", second="%rcx", wide="1", suffix="q", ext="cqo")
+% bindiv2addr(result="%rax", second="%rcx", tmp="%rdx", wide="1", suffix="q", ext="cqo")
%def op_int_to_byte():
% unop(instr="movsbl %al, %eax")
@@ -475,10 +525,10 @@
% binopWide2addr(instr="orq")
%def op_rem_int():
-% bindiv(result="%edx", second="%ecx", wide="0", suffix="l", rem="1")
+% bindiv(result="%edx", second="%ecx", tmp="%eax", wide="0", suffix="l", rem="1")
%def op_rem_int_2addr():
-% bindiv2addr(result="%edx", second="%ecx", wide="0", suffix="l", rem="1")
+% bindiv2addr(result="%edx", second="%ecx", tmp="%eax", wide="0", suffix="l", rem="1")
%def op_rem_int_lit16():
% bindivLit16(result="%edx", rem="1")
@@ -487,10 +537,10 @@
% bindivLit8(result="%edx", rem="1")
%def op_rem_long():
-% bindiv(result="%rdx", second="%rcx", wide="1", suffix="q", ext="cqo", rem="1")
+% bindiv(result="%rdx", second="%rcx", tmp="%rax", wide="1", suffix="q", ext="cqo", rem="1")
%def op_rem_long_2addr():
-% bindiv2addr(result="%rdx", second="%rcx", wide="1", suffix="q", rem="1", ext="cqo")
+% bindiv2addr(result="%rdx", second="%rcx", tmp="%rax", wide="1", suffix="q", rem="1", ext="cqo")
%def op_rsub_int():
/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */