JIT compiler update
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index cf3e6e4..538e7b0 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -2272,7 +2272,7 @@
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
{
SLJIT_ASSERT(status->tmp_regs[i] >= 0);
- SLJIT_ASSERT(sljit_get_register_index(SLJIT_INT_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
+ SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
status->store_bases[i] = -1;
}
@@ -2292,7 +2292,7 @@
if (status->store_bases[next_tmp_reg] == -1)
{
/* Preserve virtual registers. */
- if (sljit_get_register_index(SLJIT_INT_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
+ if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
}
else
@@ -2321,7 +2321,7 @@
OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
/* Restore virtual registers. */
- if (sljit_get_register_index(SLJIT_INT_REGISTER, saved_tmp_reg) < 0)
+ if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
}
@@ -3250,7 +3250,7 @@
return;
}
-if (sljit_get_register_index(SLJIT_INT_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
+if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
{
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
src = TMP3;
@@ -13739,9 +13739,9 @@
SLJIT_ASSERT(tables);
#if HAS_VIRTUAL_REGISTERS == 1
-SLJIT_ASSERT(sljit_get_register_index(SLJIT_INT_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_INT_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_INT_REGISTER, RETURN_ADDR) < 0);
+SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
#elif HAS_VIRTUAL_REGISTERS == 0
-SLJIT_ASSERT(sljit_get_register_index(SLJIT_INT_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_INT_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_INT_REGISTER, RETURN_ADDR) >= 0);
+SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
#else
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
#endif
diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h
index 93353cf..355962d 100644
--- a/src/pcre2_jit_simd_inc.h
+++ b/src/pcre2_jit_simd_inc.h
@@ -201,7 +201,7 @@
}
}
-#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_FPU))
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
@@ -214,8 +214,8 @@
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -365,7 +365,7 @@
#endif
}
-#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_FPU))
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
{
@@ -375,8 +375,8 @@
struct sljit_jump *quit;
jump_list *not_found = NULL;
vector_compare_type compare_type = vector_compare_match1;
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -497,7 +497,7 @@
#ifndef _WIN64
-#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_FPU))
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
@@ -509,9 +509,9 @@
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP2);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp1_ind = 2;
@@ -1183,7 +1183,7 @@
OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
/* VLVG */
- instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_INT_REGISTER, tmp_general_reg));
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
instruction[1] = 0;
instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
sljit_emit_op_custom(compiler, instruction, 6);
@@ -1262,8 +1262,8 @@
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -1460,8 +1460,8 @@
struct sljit_jump *quit;
jump_list *not_found = NULL;
vector_compare_type compare_type = vector_compare_match1;
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP3);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -1630,9 +1630,9 @@
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_s32 diff = IN_UCHARS(offs2 - offs1);
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP2);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp1_ind = 2;
@@ -1949,8 +1949,8 @@
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -2082,8 +2082,8 @@
struct sljit_jump *quit;
jump_list *not_found = NULL;
vector_compare_type compare_type = vector_compare_match1;
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -2183,9 +2183,9 @@
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
-sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP1);
-sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, TMP2);
-sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_INT_REGISTER, STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp1_ind = 2;
diff --git a/src/sljit/allocator_src/sljitExecAllocatorApple.c b/src/sljit/allocator_src/sljitExecAllocatorApple.c
index 87a0420..95b9842 100644
--- a/src/sljit/allocator_src/sljitExecAllocatorApple.c
+++ b/src/sljit/allocator_src/sljitExecAllocatorApple.c
@@ -33,15 +33,18 @@
On non-macOS systems, returns MAP_JIT if it is defined.
*/
#include <TargetConditionals.h>
-#if TARGET_OS_OSX
-#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86
+
+#if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (TARGET_OS_MAC && !TARGET_OS_IPHONE)
+
+#if defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86
+
#include <sys/utsname.h>
#include <stdlib.h>
#define SLJIT_MAP_JIT (get_map_jit_flag())
#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
-static SLJIT_INLINE int get_map_jit_flag()
+static SLJIT_INLINE int get_map_jit_flag(void)
{
size_t page_size;
void *ptr;
@@ -67,10 +70,8 @@
}
return map_jit_flag;
}
-#else /* !SLJIT_CONFIG_X86 */
-#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
-#error "Unsupported architecture"
-#endif /* SLJIT_CONFIG_ARM */
+
+#elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM
#include <AvailabilityMacros.h>
#include <pthread.h>
@@ -86,9 +87,24 @@
#endif /* BigSur */
pthread_jit_write_protect_np(enable_exec);
}
-#endif /* SLJIT_CONFIG_X86 */
+
+#elif defined(SLJIT_CONFIG_PPC) && SLJIT_CONFIG_PPC
+
+#define SLJIT_MAP_JIT (0)
+#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+
+#else
+#error "Unsupported architecture"
+#endif /* SLJIT_CONFIG */
+
#else /* !TARGET_OS_OSX */
+
+#ifdef MAP_JIT
#define SLJIT_MAP_JIT (MAP_JIT)
+#else
+#define SLJIT_MAP_JIT (0)
+#endif
+
#endif /* TARGET_OS_OSX */
static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h
index 0f58aa8..3d0e3da 100644
--- a/src/sljit/sljitConfigInternal.h
+++ b/src/sljit/sljitConfigInternal.h
@@ -85,6 +85,8 @@
Other macros:
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper)
+ SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit
+ floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero
*/
/***********************************************************/
@@ -201,6 +203,10 @@
/* Instruction cache flush. */
/****************************/
+#ifdef __APPLE__
+#include <AvailabilityMacros.h>
+#endif
+
/*
* TODO:
*
@@ -241,7 +247,7 @@
/* Not required to implement on archs with unified caches. */
#define SLJIT_CACHE_FLUSH(from, to)
-#elif defined __APPLE__
+#elif defined(__APPLE__) && MAC_OS_X_VERSION_MIN_REQUIRED >= 1050
/* Supported by all macs since Mac OS 10.5.
However, it does not work on non-jailbroken iOS devices,
@@ -393,9 +399,10 @@
/* Auto detecting mips revision. */
#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6)
#define SLJIT_MIPS_REV 6
-#elif (defined __mips_isa_rev && __mips_isa_rev >= 1) \
- || (defined __clang__ && defined _MIPS_ARCH_OCTEON) \
- || (defined __clang__ && defined _MIPS_ARCH_P5600)
+#elif defined(__mips_isa_rev) && __mips_isa_rev >= 1
+#define SLJIT_MIPS_REV __mips_isa_rev
+#elif defined(__clang__) \
+ && (defined(_MIPS_ARCH_OCTEON) || defined(_MIPS_ARCH_P5600))
/* clang either forgets to define (clang-7) __mips_isa_rev at all
* or sets it to zero (clang-8,-9) for -march=octeon (MIPS64 R2+)
* and -march=p5600 (MIPS32 R5).
@@ -676,6 +683,19 @@
#define SLJIT_HAS_STATUS_FLAGS_STATE 1
#endif
+/***************************************/
+/* Floating point register management. */
+/***************************************/
+
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SLJIT_F64_SECOND(reg) \
+ ((reg) + SLJIT_FS0)
+#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */
+#define SLJIT_F64_SECOND(reg) \
+ (reg)
+#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */
+
/*************************************/
/* Debug and verbose related macros. */
/*************************************/
diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c
index 9278c06..564eb73 100644
--- a/src/sljit/sljitLir.c
+++ b/src/sljit/sljitLir.c
@@ -123,29 +123,29 @@
#endif
/* Parameter parsing. */
-#define REG_MASK 0x3f
+#define REG_MASK 0x7f
#define OFFS_REG(reg) (((reg) >> 8) & REG_MASK)
#define OFFS_REG_MASK (REG_MASK << 8)
#define TO_OFFS_REG(reg) ((reg) << 8)
-/* When reg cannot be unused. */
-#define FAST_IS_REG(reg) ((reg) <= REG_MASK)
+#define FAST_IS_REG(reg) ((reg) < REG_MASK)
/* Mask for argument types. */
#define SLJIT_ARG_MASK 0x7
#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG)
-/* Mask for sljit_emit_mem. */
-#define REG_PAIR_MASK 0xff00
-#define REG_PAIR_FIRST(reg) ((reg) & 0xff)
+/* Mask for register pairs. */
+#define REG_PAIR_MASK 0x7f00
+#define REG_PAIR_FIRST(reg) ((reg) & 0x7f)
#define REG_PAIR_SECOND(reg) ((reg) >> 8)
/* Mask for sljit_emit_enter. */
#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3)
/* Getters for simd operations, which returns with log2(size). */
-#define SLJIT_SIMD_GET_REG_SIZE(type) (((type) >> 12) & 0x3f)
-#define SLJIT_SIMD_GET_ELEM_SIZE(type) (((type) >> 18) & 0x3f)
-#define SLJIT_SIMD_GET_ALIGNMENT(type) (((type) >> 24) & 0x3f)
+#define SLJIT_SIMD_GET_REG_SIZE(type) (((type) >> 12) & 0x3f)
+#define SLJIT_SIMD_GET_ELEM_SIZE(type) (((type) >> 18) & 0x3f)
+#define SLJIT_SIMD_GET_ALIGNMENT(type) (((type) >> 24) & 0x3f)
+#define SLJIT_SIMD_GET_ELEM2_SIZE(type) (((type) >> 24) & 0x3f)
/* Jump flags. */
#define JUMP_LABEL 0x1
@@ -846,10 +846,6 @@
(((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \
|| ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0))
-#define FUNCTION_CHECK_IS_FREG(fr) \
- (((fr) >= SLJIT_FR0 && (fr) < (SLJIT_FR0 + compiler->fscratches)) \
- || ((fr) > (SLJIT_FS0 - compiler->fsaveds) && (fr) <= SLJIT_FS0))
-
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#define CHECK_IF_VIRTUAL_REGISTER(p) ((p) <= SLJIT_S3 && (p) >= SLJIT_S8)
#else
@@ -858,7 +854,7 @@
static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
if (!(p & SLJIT_MEM))
@@ -895,7 +891,7 @@
static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
if (FUNCTION_CHECK_IS_REG(p))
@@ -912,7 +908,7 @@
static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
if (FUNCTION_CHECK_IS_REG(p))
@@ -924,19 +920,57 @@
#define FUNCTION_CHECK_DST(p, i) \
CHECK_ARGUMENT(function_check_dst(compiler, p, i));
-static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+
+#define FUNCTION_CHECK_IS_FREG(fr, is_32) \
+ function_check_is_freg(compiler, (fr), (is_32))
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32);
+
+#define FUNCTION_FCHECK(p, i, is_32) \
+ CHECK_ARGUMENT(function_fcheck(compiler, (p), (i), (is_32)));
+
+static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 is_32)
{
- if (compiler->scratches == -1 || compiler->saveds == -1)
+ if (compiler->scratches == -1)
return 0;
- if (FUNCTION_CHECK_IS_FREG(p))
+ if (FUNCTION_CHECK_IS_FREG(p, is_32))
return (i == 0);
return function_check_src_mem(compiler, p, i);
}
-#define FUNCTION_FCHECK(p, i) \
- CHECK_ARGUMENT(function_fcheck(compiler, p, i));
+#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */
+#define FUNCTION_CHECK_IS_FREG(fr, is_32) \
+ function_check_is_freg(compiler, (fr))
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0);
+}
+
+#define FUNCTION_FCHECK(p, i, is_32) \
+ CHECK_ARGUMENT(function_fcheck(compiler, (p), (i)));
+
+static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if ((p >= SLJIT_FR0 && p < (SLJIT_FR0 + compiler->fscratches))
+ || (p > (SLJIT_FS0 - compiler->fsaveds) && p <= SLJIT_FS0))
+ return (i == 0);
+
+ return function_check_src_mem(compiler, p, i);
+}
+
+#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */
#endif /* SLJIT_ARGUMENT_CHECKS */
@@ -973,6 +1007,14 @@
static void sljit_verbose_freg(struct sljit_compiler *compiler, sljit_s32 r)
{
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
+ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+ if (r >= (SLJIT_FS0 + SLJIT_FR0) && r <= (SLJIT_FS0 + SLJIT_FS0)) {
+ fprintf(compiler->verbose, "^");
+ r -= SLJIT_FS0;
+ }
+#endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */
+
if (r < (SLJIT_FR0 + compiler->fscratches))
fprintf(compiler->verbose, "fr%d", r - SLJIT_FR0);
else
@@ -981,7 +1023,7 @@
static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i)
{
- if ((p) & SLJIT_IMM)
+ if ((p) == SLJIT_IMM)
fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i));
else if ((p) & SLJIT_MEM) {
if ((p) & REG_MASK) {
@@ -1259,7 +1301,7 @@
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_VOID);
+ CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_RET_VOID);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1302,7 +1344,7 @@
if (GET_OPCODE(op) < SLJIT_MOV_F64) {
FUNCTION_CHECK_SRC(src, srcw);
} else {
- FUNCTION_FCHECK(src, srcw);
+ FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
}
compiler->last_flags = 0;
#endif
@@ -1417,6 +1459,7 @@
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P);
CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32);
@@ -1458,6 +1501,7 @@
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_ATOMIC));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOV_P);
CHECK_ARGUMENT(GET_OPCODE(op) != SLJIT_MOV_S8 && GET_OPCODE(op) != SLJIT_MOV_S16 && GET_OPCODE(op) != SLJIT_MOV_S32);
@@ -1469,7 +1513,7 @@
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == SLJIT_ATOMIC_STORED);
if (GET_OPCODE(op) == SLJIT_MOV_U8 || GET_OPCODE(op) == SLJIT_MOV_U16) {
- /* Only SLJIT_32, SLJIT_ATOMIC_STORED is allowed. */
+ /* Only SLJIT_32, SLJIT_ATOMIC_STORED are allowed. */
CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
} else {
/* Only SLJIT_ATOMIC_STORED is allowed. */
@@ -1660,7 +1704,7 @@
SLJIT_UNUSED_ARG(type);
SLJIT_UNUSED_ARG(reg);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- if (type == SLJIT_INT_REGISTER) {
+ if (type == SLJIT_GP_REGISTER) {
CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS);
} else {
CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6)));
@@ -1719,8 +1763,8 @@
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- FUNCTION_FCHECK(src, srcw);
- FUNCTION_FCHECK(dst, dstw);
+ FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
+ FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1759,8 +1803,8 @@
CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK)
|| (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL));
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
+ FUNCTION_FCHECK(src1, src1w, op & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1790,7 +1834,7 @@
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- FUNCTION_FCHECK(src, srcw);
+ FUNCTION_FCHECK(src, srcw, op & SLJIT_32);
FUNCTION_CHECK_DST(dst, dstw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1820,7 +1864,7 @@
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_CHECK_SRC(src, srcw);
- FUNCTION_FCHECK(dst, dstw);
+ FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1850,9 +1894,9 @@
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
- FUNCTION_FCHECK(dst, dstw);
+ FUNCTION_FCHECK(src1, src1w, op & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, op & SLJIT_32);
+ FUNCTION_FCHECK(dst, dstw, op & SLJIT_32);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1876,9 +1920,9 @@
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_COPYSIGN_F64);
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg));
+ FUNCTION_FCHECK(src1, src1w, op & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, op & SLJIT_32);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, op & SLJIT_32));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1906,7 +1950,7 @@
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 1));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1930,7 +1974,7 @@
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1949,7 +1993,7 @@
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_COPY_TO_F64 && GET_OPCODE(op) <= SLJIT_COPY_FROM_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, op & SLJIT_32));
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg));
@@ -2134,8 +2178,8 @@
CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL);
- FUNCTION_FCHECK(src1, src1w);
- FUNCTION_FCHECK(src2, src2w);
+ FUNCTION_FCHECK(src1, src1w, type & SLJIT_32);
+ FUNCTION_FCHECK(src2, src2w, type & SLJIT_32);
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -2302,9 +2346,9 @@
CHECK_ARGUMENT(cond >= SLJIT_EQUAL && cond <= SLJIT_ORDERED_LESS_EQUAL);
CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg));
- FUNCTION_FCHECK(src1, src1w);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_freg, type & SLJIT_32));
+ FUNCTION_FCHECK(src1, src1w, type & SLJIT_32);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src2_freg, type & SLJIT_32));
if (cond <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
@@ -2335,14 +2379,16 @@
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ sljit_s32 allowed_flags;
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
if (SLJIT_UNLIKELY(compiler->skip_checks)) {
compiler->skip_checks = 0;
CHECK_RETURN_OK;
}
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- sljit_s32 allowed_flags;
-
if (type & SLJIT_MEM_UNALIGNED) {
CHECK_ARGUMENT(!(type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)));
} else if (type & SLJIT_MEM_ALIGNED_16) {
@@ -2354,14 +2400,14 @@
allowed_flags = SLJIT_MEM_UNALIGNED;
switch (type & 0xff) {
+ case SLJIT_MOV_P:
+ case SLJIT_MOV:
+ allowed_flags |= SLJIT_MEM_ALIGNED_32;
+ /* fallthrough */
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV32:
- allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16;
- break;
- case SLJIT_MOV:
- case SLJIT_MOV_P:
- allowed_flags = SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32;
+ allowed_flags |= SLJIT_MEM_ALIGNED_16;
break;
}
@@ -2465,6 +2511,7 @@
sljit_s32 mem, sljit_sw memw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64);
if (type & SLJIT_MEM_UNALIGNED) {
@@ -2477,7 +2524,7 @@
}
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32));
FUNCTION_CHECK_SRC_MEM(mem, memw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -2508,10 +2555,11 @@
sljit_s32 mem, sljit_sw memw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU));
CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64);
CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_POST)) == 0);
FUNCTION_CHECK_SRC_MEM(mem, memw);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, type & SLJIT_32));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -2541,12 +2589,13 @@
sljit_s32 srcdst, sljit_sw srcdstw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
CHECK_ARGUMENT((type & (sljit_s32)(0xc0000fff - (SLJIT_SIMD_STORE | SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST))) == 0);
CHECK_ARGUMENT((type & 0x3f000) >= SLJIT_SIMD_REG_64 && (type & 0x3f000) <= SLJIT_SIMD_REG_512);
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type));
CHECK_ARGUMENT(SLJIT_SIMD_GET_ALIGNMENT(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0);
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
- FUNCTION_FCHECK(srcdst, srcdstw);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ FUNCTION_FCHECK(srcdst, srcdstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -2582,16 +2631,17 @@
sljit_s32 src, sljit_sw srcw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
CHECK_ARGUMENT((type & (sljit_s32)(0xff000fff - (SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST))) == 0);
CHECK_ARGUMENT((type & 0x3f000) >= SLJIT_SIMD_REG_64 && (type & 0x3f000) <= SLJIT_SIMD_REG_512);
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
if (type & SLJIT_SIMD_FLOAT) {
if (src == SLJIT_IMM) {
CHECK_ARGUMENT(srcw == 0);
} else {
- FUNCTION_FCHECK(src, srcw);
+ FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
}
} else if (src != SLJIT_IMM) {
FUNCTION_CHECK_DST(src, srcw);
@@ -2628,15 +2678,20 @@
sljit_s32 srcdst, sljit_sw srcdstw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- CHECK_ARGUMENT((type & (sljit_s32)(0xff000fff - (SLJIT_SIMD_STORE | SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST))) == 0);
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & (sljit_s32)(0xff000fff - (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO | SLJIT_SIMD_LANE_SIGNED | SLJIT_32 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST))) == 0);
+ CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO)) != (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_ZERO));
+ CHECK_ARGUMENT((type & (SLJIT_SIMD_STORE | SLJIT_SIMD_LANE_SIGNED)) != SLJIT_SIMD_LANE_SIGNED);
+ CHECK_ARGUMENT(!(type & SLJIT_SIMD_FLOAT) || !(type & (SLJIT_SIMD_LANE_SIGNED | SLJIT_32)));
CHECK_ARGUMENT((type & 0x3f000) >= SLJIT_SIMD_REG_64 && (type & 0x3f000) <= SLJIT_SIMD_REG_512);
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
+ CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2);
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type))));
if (type & SLJIT_SIMD_FLOAT) {
- FUNCTION_FCHECK(srcdst, srcdstw);
- } else if ((type & SLJIT_SIMD_STORE) || (srcdst != SLJIT_IMM)) {
+ FUNCTION_FCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
+ } else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) {
FUNCTION_CHECK_DST(srcdst, srcdstw);
}
#endif
@@ -2649,8 +2704,11 @@
CHECK_RETURN_OK;
}
- fprintf(compiler->verbose, " simd_%s_lane.%d.%s%d ",
+ fprintf(compiler->verbose, " simd_%s_lane%s%s%s.%d.%s%d ",
(type & SLJIT_SIMD_STORE) ? "store" : "load",
+ (type & SLJIT_32) ? "32" : "",
+ (type & SLJIT_SIMD_LANE_ZERO) ? "_z" : "",
+ (type & SLJIT_SIMD_LANE_SIGNED) ? "_s" : "",
(8 << SLJIT_SIMD_GET_REG_SIZE(type)),
(type & SLJIT_SIMD_FLOAT) ? "f" : "",
(8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
@@ -2672,11 +2730,12 @@
sljit_s32 src, sljit_s32 src_lane_index)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
CHECK_ARGUMENT((type & (sljit_s32)(0xff000fff - (SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST))) == 0);
CHECK_ARGUMENT((type & 0x3f000) >= SLJIT_SIMD_REG_64 && (type & 0x3f000) <= SLJIT_SIMD_REG_512);
CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg));
- CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0));
CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type))));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -2702,6 +2761,82 @@
CHECK_RETURN_OK;
}
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & (sljit_s32)(0xc0000fff - (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST))) == 0);
+ CHECK_ARGUMENT((type & (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT)) != (SLJIT_SIMD_EXTEND_SIGNED | SLJIT_SIMD_FLOAT));
+ CHECK_ARGUMENT((type & 0x3f000) >= SLJIT_SIMD_REG_64 && (type & 0x3f000) <= SLJIT_SIMD_REG_512);
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_TEST, freg, src, srcw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_extend: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_load_extend%s.%d.%s%d.%s%d ",
+ (type & SLJIT_SIMD_EXTEND_SIGNED) ? "_s" : "",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM2_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(compiler, src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_SIMD));
+ CHECK_ARGUMENT((type & (sljit_s32)(0xff000fff - (SLJIT_SIMD_FLOAT | SLJIT_SIMD_TEST | SLJIT_32))) == SLJIT_SIMD_STORE);
+ CHECK_ARGUMENT((type & 0x3f000) >= SLJIT_SIMD_REG_64 && (type & 0x3f000) <= SLJIT_SIMD_REG_512);
+ CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type));
+ CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg, 0));
+ FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ if (type & SLJIT_SIMD_TEST)
+ CHECK_RETURN_OK;
+ if (sljit_emit_simd_sign(compiler, type | SLJIT_SIMD_TEST, freg, dst, dstw) == SLJIT_ERR_UNSUPPORTED) {
+ fprintf(compiler->verbose, " # simd_sign: unsupported form, no instructions are emitted\n");
+ CHECK_RETURN_OK;
+ }
+
+ fprintf(compiler->verbose, " simd_store_sign%s.%d.%s%d ",
+ (type & SLJIT_32) ? "32" : "",
+ (8 << SLJIT_SIMD_GET_REG_SIZE(type)),
+ (type & SLJIT_SIMD_FLOAT) ? "f" : "",
+ (8 << SLJIT_SIMD_GET_ELEM_SIZE(type)));
+
+ sljit_verbose_freg(compiler, freg);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(compiler, dst, dstw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+ CHECK_RETURN_OK;
+}
+
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
/* Any offset is allowed. */
@@ -2913,7 +3048,8 @@
}
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
- && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ && !(defined(SLJIT_CONFIG_LOONGARCH_64) && SLJIT_CONFIG_LOONGARCH_64)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst_freg,
@@ -2929,7 +3065,7 @@
return sljit_emit_fop2(compiler, op, dst_freg, 0, src1, src1w, src2, src2w);
}
-#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X */
+#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH_64 */
#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
&& !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
@@ -2949,18 +3085,18 @@
condition = type & 0xff;
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) {
- if ((src1 & SLJIT_IMM) && !src1w) {
+ if (src1 == SLJIT_IMM && !src1w) {
src1 = src2;
src1w = src2w;
src2 = SLJIT_IMM;
src2w = 0;
}
- if ((src2 & SLJIT_IMM) && !src2w)
+ if (src2 == SLJIT_IMM && !src2w)
return emit_cmp_to0(compiler, type, src1, src1w);
}
#endif
- if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) {
+ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM && src2 != SLJIT_IMM)) {
/* Immediate is preferred as second argument by most architectures. */
switch (condition) {
case SLJIT_LESS:
@@ -3104,6 +3240,11 @@
{
CHECK_ERROR();
CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(srcdst);
+ SLJIT_UNUSED_ARG(srcdstw);
return SLJIT_ERR_UNSUPPORTED;
}
@@ -3114,6 +3255,11 @@
{
CHECK_ERROR();
CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
return SLJIT_ERR_UNSUPPORTED;
}
@@ -3124,6 +3270,12 @@
{
CHECK_ERROR();
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(lane_index);
+ SLJIT_UNUSED_ARG(srcdst);
+ SLJIT_UNUSED_ARG(srcdstw);
return SLJIT_ERR_UNSUPPORTED;
}
@@ -3134,12 +3286,88 @@
{
CHECK_ERROR();
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(src_lane_index);
+
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
+
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(type);
+ SLJIT_UNUSED_ARG(freg);
+ SLJIT_UNUSED_ARG(dst);
+ SLJIT_UNUSED_ARG(dstw);
return SLJIT_ERR_UNSUPPORTED;
}
#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM */
+#if !(defined(SLJIT_CONFIG_X86) && SLJIT_CONFIG_X86) \
+ && !(defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM) \
+ && !(defined(SLJIT_CONFIG_S390X) && SLJIT_CONFIG_S390X) \
+ && !(defined(SLJIT_CONFIG_LOONGARCH) && SLJIT_CONFIG_LOONGARCH)
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
+ sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(dst_reg);
+ SLJIT_UNUSED_ARG(mem_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
+ sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(src_reg);
+ SLJIT_UNUSED_ARG(mem_reg);
+ SLJIT_UNUSED_ARG(temp_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+ return SLJIT_ERR_UNSUPPORTED;
+}
+
+#endif /* !SLJIT_CONFIG_X86 && !SLJIT_CONFIG_ARM && !SLJIT_CONFIG_S390X && !SLJIT_CONFIG_LOONGARCH */
+
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h
index ae68737..95d59e4 100644
--- a/src/sljit/sljitLir.h
+++ b/src/sljit/sljitLir.h
@@ -322,20 +322,24 @@
arg_d must be placed in SLJIT_FR1
Examples for argument processing by sljit_emit_enter:
- SLJIT_ARGS4(VOID, P, 32_R, F32, W)
+ SLJIT_ARGS4V(P, 32_R, F32, W)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1
+ The type of the result is void.
- SLJIT_ARGS4(VOID, W, W_R, W, W_R)
+ SLJIT_ARGS4(F32, W, W_R, W, W_R)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3
+ The type of the result is sljit_f32.
- SLJIT_ARGS4(VOID, F64, W, F32, P_R)
+ SLJIT_ARGS4(P, W, F32, P_R)
Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1
+ The type of the result is pointer.
Note: it is recommended to pass the scratch arguments first
followed by the saved arguments:
- SLJIT_ARGS4(VOID, W_R, W_R, W, W)
+ SLJIT_ARGS4(W, W_R, W_R, W, W)
Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1
+ The type of the result is sljit_sw / sljit_uw.
*/
/* The following flag is only allowed for the integer arguments of
@@ -343,21 +347,21 @@
stored in a scratch register instead of a saved register. */
#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8
-/* Void result, can only be used by SLJIT_ARG_RETURN. */
-#define SLJIT_ARG_TYPE_VOID 0
+/* No return value, only supported by SLJIT_ARG_RETURN. */
+#define SLJIT_ARG_TYPE_RET_VOID 0
/* Machine word sized integer argument or result. */
-#define SLJIT_ARG_TYPE_W 1
+#define SLJIT_ARG_TYPE_W 1
#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 32 bit integer argument or result. */
-#define SLJIT_ARG_TYPE_32 2
+#define SLJIT_ARG_TYPE_32 2
#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG)
/* Pointer sized integer argument or result. */
-#define SLJIT_ARG_TYPE_P 3
+#define SLJIT_ARG_TYPE_P 3
#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 64 bit floating point argument or result. */
-#define SLJIT_ARG_TYPE_F64 4
+#define SLJIT_ARG_TYPE_F64 4
/* 32 bit floating point argument or result. */
-#define SLJIT_ARG_TYPE_F32 5
+#define SLJIT_ARG_TYPE_F32 5
#define SLJIT_ARG_SHIFT 4
#define SLJIT_ARG_RETURN(type) (type)
@@ -370,24 +374,40 @@
can be shortened to:
SLJIT_ARGS1(W, F32)
+
+ Another example where no value is returned:
+ SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W_R, 1)
+
+ can be shortened to:
+ SLJIT_ARGS1V(W_R)
*/
#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type
#define SLJIT_ARGS0(ret) \
SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret))
+#define SLJIT_ARGS0V() \
+ SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_RET_VOID)
#define SLJIT_ARGS1(ret, arg1) \
(SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
+#define SLJIT_ARGS1V(arg1) \
+ (SLJIT_ARGS0V() | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
#define SLJIT_ARGS2(ret, arg1, arg2) \
(SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
+#define SLJIT_ARGS2V(arg1, arg2) \
+ (SLJIT_ARGS1V(arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \
(SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
+#define SLJIT_ARGS3V(arg1, arg2, arg3) \
+ (SLJIT_ARGS2V(arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \
(SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
+#define SLJIT_ARGS4V(arg1, arg2, arg3, arg4) \
+ (SLJIT_ARGS3V(arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
/* --------------------------------------------------------------------- */
/* Main structures and functions */
@@ -667,17 +687,25 @@
#define SLJIT_HAS_COPY_F32 9
/* [Emulated] Copy from/to f64 operation is available (see sljit_emit_fcopy). */
#define SLJIT_HAS_COPY_F64 10
+/* [Not emulated] The 64 bit floating point registers can be used as
+ two separate 32 bit floating point registers (e.g. ARM32). The
+ second 32 bit part can be accessed by SLJIT_F64_SECOND. */
+#define SLJIT_HAS_F64_AS_F32_PAIR 11
/* [Not emulated] Some SIMD operations are supported by the compiler. */
-#define SLJIT_HAS_SIMD 11
+#define SLJIT_HAS_SIMD 12
/* [Not emulated] SIMD registers are mapped to a pair of double precision
floating point registers. E.g. passing either SLJIT_FR0 or SLJIT_FR1 to
a simd operation represents the same 128 bit register, and both SLJIT_FR0
and SLJIT_FR1 are overwritten. */
-#define SLJIT_SIMD_REGS_ARE_PAIRS 12
+#define SLJIT_SIMD_REGS_ARE_PAIRS 13
+/* [Not emulated] Atomic support is available (fine-grained). */
+#define SLJIT_HAS_ATOMIC 14
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
-/* [Not emulated] SSE2 support is available on x86. */
-#define SLJIT_HAS_SSE2 100
+/* [Not emulated] AVX support is available on x86. */
+#define SLJIT_HAS_AVX 100
+/* [Not emulated] AVX2 support is available on x86. */
+#define SLJIT_HAS_AVX2 101
#endif
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
@@ -873,17 +901,17 @@
#define SLJIT_MEM0() (SLJIT_MEM)
#define SLJIT_MEM1(r1) (SLJIT_MEM | (r1))
#define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8))
-#define SLJIT_IMM 0x40
+#define SLJIT_IMM 0x7f
#define SLJIT_REG_PAIR(r1, r2) ((r1) | ((r2) << 8))
/* Macros for checking operand types (only for valid arguments). */
#define SLJIT_IS_REG(arg) ((arg) > 0 && (arg) < SLJIT_IMM)
#define SLJIT_IS_MEM(arg) ((arg) & SLJIT_MEM)
#define SLJIT_IS_MEM0(arg) ((arg) == SLJIT_MEM)
-#define SLJIT_IS_MEM1(arg) ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM + SLJIT_IMM))
+#define SLJIT_IS_MEM1(arg) ((arg) > SLJIT_MEM && (arg) < (SLJIT_MEM << 1))
#define SLJIT_IS_MEM2(arg) (((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1))
#define SLJIT_IS_IMM(arg) ((arg) == SLJIT_IMM)
-#define SLJIT_IS_REG_PAIR(arg) (((arg) >> 8) != 0)
+#define SLJIT_IS_REG_PAIR(arg) (!((arg) & SLJIT_MEM) && (arg) >= (SLJIT_MEM << 1))
/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on
32 bit CPUs. When this option is set for an arithmetic operation, only
@@ -1813,7 +1841,7 @@
sljit_s32 freg,
sljit_s32 mem, sljit_sw memw);
-/* The following flags are used by several simd operations. */
+/* The following options are used by several simd operations. */
/* Load data into a simd register, this is the default */
#define SLJIT_SIMD_LOAD 0x000000
@@ -1842,7 +1870,7 @@
/* Element size is 128 bit long */
#define SLJIT_SIMD_ELEM_128 (4 << 18)
-/* The following flags are used by sljit_emit_simd_mem(). */
+/* The following options are used by sljit_emit_simd_mov(). */
/* Memory address is unaligned (this is the default) */
#define SLJIT_SIMD_MEM_UNALIGNED (0 << 24)
@@ -1854,6 +1882,10 @@
#define SLJIT_SIMD_MEM_ALIGNED_64 (3 << 24)
/* Memory address is 128 bit aligned */
#define SLJIT_SIMD_MEM_ALIGNED_128 (4 << 24)
+/* Memory address is 256 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_256 (5 << 24)
+/* Memory address is 512 bit aligned */
+#define SLJIT_SIMD_MEM_ALIGNED_512 (6 << 24)
/* Moves data between a simd register and memory.
@@ -1862,7 +1894,7 @@
it does not emit any instructions.
type must be a combination of SLJIT_SIMD_* and
- SLJIT_SIMD_MEM_* flags
+ SLJIT_SIMD_MEM_* options
freg is the source or destination simd register
of the operation
srcdst must be a memory operand or a simd register
@@ -1884,14 +1916,14 @@
SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
it does not emit any instructions.
- type must be a combination of SLJIT_SIMD_* flags except
- SLJIT_SIMD_STORE.
+ type must be a combination of SLJIT_SIMD_* options
+ except SLJIT_SIMD_STORE.
freg is the destination simd register of the operation
src is the value which is replicated
Note:
- When SLJIT_SIMD_FLOAT is specified, the
- SLJIT_IMM, 0 can be passed as src/srcw arguments.
+ The src == SLJIT_IMM and srcw == 0 can be used to
+ clear a register even when SLJIT_SIMD_FLOAT is set.
Flags: - (does not modify flags) */
@@ -1899,6 +1931,13 @@
sljit_s32 freg,
sljit_s32 src, sljit_sw srcw);
+/* The following options are used by sljit_emit_simd_lane_mov(). */
+
+/* Clear all bits of the simd register before loading the lane. */
+#define SLJIT_SIMD_LANE_ZERO 0x000002
+/* Sign extend the integer value stored from the lane. */
+#define SLJIT_SIMD_LANE_SIGNED 0x000004
+
/* Moves data between a simd register lane and a register or
memory. If the srcdst argument is a register, it must be
a floating point register when SLJIT_SIMD_FLOAT is specified,
@@ -1908,7 +1947,13 @@
SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
it does not emit any instructions.
- type must be a combination of SLJIT_SIMD_* flags
+ type must be a combination of SLJIT_SIMD_* options
+ Further options:
+ SLJIT_32 - when SLJIT_SIMD_FLOAT is not set
+ SLJIT_SIMD_LANE_SIGNED - when SLJIT_SIMD_STORE
+ is set and SLJIT_SIMD_FLOAT is not set
+ SLJIT_SIMD_LANE_ZERO - when SLJIT_SIMD_LOAD
+ is specified
freg is the source or destination simd register
of the operation
lane_index is the index of the lane
@@ -1931,8 +1976,8 @@
SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
it does not emit any instructions.
- type must be a combination of SLJIT_SIMD_* flags except
- SLJIT_SIMD_STORE.
+ type must be a combination of SLJIT_SIMD_* options
+ except SLJIT_SIMD_STORE.
freg is the destination simd register of the operation
src is the simd register which lane is replicated
src_lane_index is the lane index of the src register
@@ -1943,6 +1988,57 @@
sljit_s32 freg,
sljit_s32 src, sljit_s32 src_lane_index);
+/* The following options are used by sljit_emit_simd_load_extend(). */
+
+/* Sign extend the integer elements */
+#define SLJIT_SIMD_EXTEND_SIGNED 0x000002
+/* Extend data to 16 bit */
+#define SLJIT_SIMD_EXTEND_16 (1 << 24)
+/* Extend data to 32 bit */
+#define SLJIT_SIMD_EXTEND_32 (2 << 24)
+/* Extend data to 64 bit */
+#define SLJIT_SIMD_EXTEND_64 (3 << 24)
+
+/* Extend elements and stores them in a simd register.
+ The extension operation increases the size of the
+ elements (e.g. from 16 bit to 64 bit). For integer
+ values, the extension can be signed or unsigned.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_*, and
+ SLJIT_SIMD_EXTEND_* options except SLJIT_SIMD_STORE
+ freg is the destination simd register of the operation
+ src must be a memory operand or a simd register.
+ In the latter case, the source elements are stored
+ in the lower half of the register.
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw);
+
+/* Extract the highest bit (usually the sign bit) from
+ each elements of a vector.
+
+ If the operation is not supported, it returns with
+ SLJIT_ERR_UNSUPPORTED. If SLJIT_SIMD_TEST is passed,
+ it does not emit any instructions.
+
+ type must be a combination of SLJIT_SIMD_* and SLJIT_32
+ options except SLJIT_SIMD_LOAD
+ freg is the source simd register of the operation
+ dst is the destination operand
+
+ Flags: - (does not modify flags) */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw);
+
/* The sljit_emit_atomic_load and sljit_emit_atomic_store operation pair
can perform an atomic read-modify-write operation. First, an unsigned
value must be loaded from memory using sljit_emit_atomic_load. Then,
@@ -2039,15 +2135,15 @@
/* Types for sljit_get_register_index */
-/* Integer registers. */
-#define SLJIT_INT_REGISTER 0
+/* General purpose (integer) registers. */
+#define SLJIT_GP_REGISTER 0
/* Floating point registers. */
#define SLJIT_FLOAT_REGISTER 1
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index ( >=0 ) of any registers.
- When type is SLJIT_INT_REGISTER:
+ When type is SLJIT_GP_REGISTER:
reg must be an SLJIT_R(i), SLJIT_S(i), or SLJIT_SP register
When type is SLJIT_FLOAT_REGISTER:
diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c
index 4a54436..55c62b7 100644
--- a/src/sljit/sljitNativeARM_32.c
+++ b/src/sljit/sljitNativeARM_32.c
@@ -49,8 +49,8 @@
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
+#define TMP_FREG1 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 1)
+#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 2)
/* In ARM instruction words.
Cache lines are usually 32 byte aligned. */
@@ -67,8 +67,18 @@
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
};
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
- 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7
+static const sljit_u8 freg_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3] = {
+ 0,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 6, 7
+};
+
+static const sljit_u8 freg_ebit_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3] = {
+ 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0
};
#define RM(rm) ((sljit_ins)reg_map[rm])
@@ -76,9 +86,9 @@
#define RD(rd) ((sljit_ins)reg_map[rd] << 12)
#define RN(rn) ((sljit_ins)reg_map[rn] << 16)
-#define VM(rm) ((sljit_ins)freg_map[rm])
-#define VD(rd) ((sljit_ins)freg_map[rd] << 12)
-#define VN(rn) ((sljit_ins)freg_map[rn] << 16)
+#define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
+#define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
+#define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -95,13 +105,13 @@
#define AND 0xe0000000
#define B 0xea000000
#define BIC 0xe1c00000
+#define BKPT 0xe1200070
#define BL 0xeb000000
#define BLX 0xe12fff30
#define BX 0xe12fff10
#define CLZ 0xe16f0f10
#define CMN 0xe1600000
#define CMP 0xe1400000
-#define BKPT 0xe1200070
#define EOR 0xe0200000
#define LDR 0xe5100000
#define LDR_POST 0xe4100000
@@ -115,7 +125,6 @@
#define ORR 0xe1800000
#define PUSH 0xe92d0000
#define POP 0xe8bd0000
-#define RBIT 0xe6ff0f30
#define REV 0xe6bf0f30
#define REV16 0xe6bf0fb0
#define RSB 0xe0600000
@@ -152,12 +161,16 @@
#define VMOV2 0xec400a10
#define VMOV_i 0xf2800010
#define VMOV_s 0xee000b10
+#define VMOVN 0xf3b20200
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
#define VORR 0xf2200110
#define VPOP 0xecbd0b00
#define VPUSH 0xed2d0b00
+#define VSHLL 0xf2800a10
+#define VSHR 0xf2800010
+#define VSRA 0xf2800110
#define VST1 0xf4000000
#define VST1_s 0xf4800000
#define VSTR_F32 0xed000a00
@@ -167,8 +180,25 @@
/* Arm v7 specific instructions. */
#define MOVT 0xe3400000
#define MOVW 0xe3000000
+#define RBIT 0xe6ff0f30
#endif
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (is_32 && fr >= (SLJIT_FS0 + SLJIT_FR0) && fr <= (SLJIT_FS0 + SLJIT_FS0))
+ fr -= SLJIT_FS0;
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0);
+}
+
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
static sljit_s32 push_cpool(struct sljit_compiler *compiler)
@@ -973,25 +1003,43 @@
{
switch (feature_type) {
case SLJIT_HAS_FPU:
- case SLJIT_HAS_SIMD:
- case SLJIT_SIMD_REGS_ARE_PAIRS:
+ case SLJIT_HAS_F64_AS_F32_PAIR:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
-#endif
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+ case SLJIT_HAS_SIMD:
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ return 0;
+#else
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
+#else
+ /* Available by default. */
+ return 1;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+#endif /* SLJIT_CONFIG_ARM_V6 */
+ case SLJIT_SIMD_REGS_ARE_PAIRS:
case SLJIT_HAS_CLZ:
- case SLJIT_HAS_CTZ:
case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_REV:
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
+ case SLJIT_HAS_CTZ:
+#if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
+ return 2;
+#else
+ return 1;
+#endif /* SLJIT_CONFIG_ARM_V6 */
+
default:
return 0;
}
@@ -1491,8 +1539,16 @@
case SLJIT_CTZ:
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+ FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
+ FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
+ return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
+#else /* !SLJIT_CONFIG_ARM_V6 */
FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
return push_inst(compiler, CLZ | RD(dst) | RM(dst));
+#endif /* SLJIT_CONFIG_ARM_V6 */
case SLJIT_REV:
case SLJIT_REV_U32:
@@ -1951,10 +2007,11 @@
if (!(inp_flags & ALLOW_IMM))
break;
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
if (src2_reg)
break;
+
if (inp_flags & ALLOW_INV_IMM) {
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
if (src2_reg) {
@@ -1962,8 +2019,9 @@
break;
}
}
+
if (neg_op != 0) {
- src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w);
+ src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
if (src2_reg) {
op = neg_op | GET_ALL_FLAGS(op);
break;
@@ -1971,7 +2029,7 @@
}
}
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
if (src2_reg) {
flags |= ARGS_SWAPPED;
@@ -1979,6 +2037,7 @@
src1w = src2w;
break;
}
+
if (inp_flags & ALLOW_INV_IMM) {
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
if (src2_reg) {
@@ -1988,8 +2047,11 @@
break;
}
}
+
if (neg_op >= SLJIT_SUB) {
/* Note: additive operation (commutative). */
+ SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
+
src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
if (src2_reg) {
src1 = src2;
@@ -2211,16 +2273,16 @@
return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_MOV_U8:
- return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
@@ -2262,7 +2324,7 @@
case SLJIT_XOR:
inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
- if (((src1 & SLJIT_IMM) && src1w == -1) || ((src2 & SLJIT_IMM) && src2w == -1)) {
+ if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
inp_flags |= ALLOW_INV_IMM;
}
return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
@@ -2281,7 +2343,7 @@
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
compiler->shift_imm = src2w & 0x1f;
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
} else {
@@ -2326,7 +2388,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
/* Shift type of ROR is 3. */
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= 0x1f;
if (src3w == 0)
@@ -2427,7 +2489,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
@@ -2689,8 +2751,10 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
sljit_s32 freg, sljit_f32 value)
{
+#if defined(__ARM_NEON) && __ARM_NEON
sljit_u32 exp;
sljit_ins ins;
+#endif /* NEON */
union {
sljit_u32 imm;
sljit_f32 value;
@@ -2701,6 +2765,7 @@
u.value = value;
+#if defined(__ARM_NEON) && __ARM_NEON
if ((u.imm << (32 - 19)) == 0) {
exp = (u.imm >> (23 + 2)) & 0x3f;
@@ -2709,6 +2774,7 @@
return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
}
}
+#endif /* NEON */
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
@@ -2717,8 +2783,10 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
sljit_s32 freg, sljit_f64 value)
{
+#if defined(__ARM_NEON) && __ARM_NEON
sljit_u32 exp;
sljit_ins ins;
+#endif /* NEON */
union {
sljit_u32 imm[2];
sljit_f64 value;
@@ -2729,6 +2797,7 @@
u.value = value;
+#if defined(__ARM_NEON) && __ARM_NEON
if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
@@ -2737,6 +2806,7 @@
return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
}
}
+#endif /* NEON */
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
if (u.imm[0] == u.imm[1])
@@ -3169,7 +3239,7 @@
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (FAST_IS_REG(src)) {
SLJIT_ASSERT(reg_map[src] != 14);
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
@@ -3365,7 +3435,7 @@
cc = get_cc(compiler, type & ~SLJIT_32);
- if (SLJIT_UNLIKELY(src1 & SLJIT_IMM)) {
+ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
tmp = get_imm((sljit_uw)src1w);
if (tmp)
return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
@@ -3686,6 +3756,20 @@
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
}
+static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
+{
+ freg += freg & 0x1;
+
+ SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
+
+ if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
+ freg--;
+
+ return freg;
+}
+
+#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 freg,
sljit_s32 srcdst, sljit_sw srcdstw)
@@ -3710,9 +3794,12 @@
return SLJIT_SUCCESS;
if (reg_size == 4)
- freg -= 1 - (freg & 0x1);
+ freg = simd_get_quad_reg_index(freg);
if (!(srcdst & SLJIT_MEM)) {
+ if (reg_size == 4)
+ srcdst = simd_get_quad_reg_index(srcdst);
+
if (type & SLJIT_SIMD_STORE)
ins = VD(srcdst) | VN(freg) | VM(freg);
else
@@ -3859,9 +3946,9 @@
return SLJIT_SUCCESS;
if (reg_size == 4)
- freg -= 1 - (freg & 0x1);
+ freg = simd_get_quad_reg_index(freg);
- if ((src & SLJIT_IMM) && srcw == 0)
+ if (src == SLJIT_IMM && srcw == 0)
return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg));
if (SLJIT_UNLIKELY(elem_size == 3)) {
@@ -3873,7 +3960,7 @@
} else if (freg != src)
FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
- freg++;
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
if (freg != src)
return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
@@ -3892,15 +3979,16 @@
}
if (type & SLJIT_SIMD_FLOAT) {
- ins = ((sljit_ins)1 << (16 + elem_size));
+ SLJIT_ASSERT(elem_size == 2);
+ ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
if (reg_size == 4)
ins |= 1 << 6;
- return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
+ return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]);
}
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (elem_size < 2)
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
@@ -3957,13 +4045,37 @@
if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
- if (reg_size == 4) {
- freg -= 1 - (freg & 0x1);
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
- if (lane_index >= (0x8 >> elem_size)) {
- lane_index -= (0x8 >> elem_size);
- freg++;
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
+ if (lane_index == 1)
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (srcdst != freg)
+ FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst(compiler, VMOV_i | VD(freg));
+ }
+
+ if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
+ FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG1) | VN(freg) | VM(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
}
+
+ FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg)));
+ }
+
+ if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
+ lane_index -= (0x8 >> elem_size);
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
}
if (srcdst & SLJIT_MEM) {
@@ -3985,16 +4097,22 @@
}
if (type & SLJIT_SIMD_STORE) {
- if (lane_index == 0)
- return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(srcdst) | VM(freg));
- return push_inst(compiler, VDUP_s | 0xc0000 | VD(srcdst) | VM(freg));
+ if (freg_ebit_map[freg] == 0) {
+ if (lane_index == 1)
+ freg = SLJIT_F64_SECOND(freg);
+
+ return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg));
+ }
+
+ FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)));
+ return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
}
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1));
}
- if (srcdst & SLJIT_IMM) {
+ if (srcdst == SLJIT_IMM) {
if (elem_size < 2)
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
@@ -4013,10 +4131,10 @@
ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
if (type & SLJIT_SIMD_STORE) {
- ins |= 0x100000;
+ ins |= (1 << 20);
- if (elem_size < 2)
- ins |= 0x800000;
+ if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
+ ins |= (1 << 23);
}
return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst));
@@ -4043,12 +4161,12 @@
return SLJIT_SUCCESS;
if (reg_size == 4) {
- freg -= 1 - (freg & 0x1);
- src -= 1 - (src & 0x1);
+ freg = simd_get_quad_reg_index(freg);
+ src = simd_get_quad_reg_index(src);
if (src_lane_index >= (0x8 >> elem_size)) {
src_lane_index -= (0x8 >> elem_size);
- src++;
+ src += SLJIT_QUAD_OTHER_HALF(src);
}
}
@@ -4056,7 +4174,7 @@
if (freg != src)
FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
- freg++;
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
if (freg != src)
return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
@@ -4071,6 +4189,148 @@
return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_s32 dst_reg;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf));
+ else
+ FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf));
+ src = freg;
+ } else if (reg_size == 4)
+ src = simd_get_quad_reg_index(src);
+
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ dst_reg = (reg_size == 4) ? freg : TMP_FREG1;
+
+ do {
+ FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
+ | ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
+ src = dst_reg;
+ } while (++elem_size < elem2_size);
+
+ if (dst_reg == TMP_FREG1)
+ return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG1) | VM(TMP_FREG1));
+ return SLJIT_SUCCESS;
+ }
+
+ /* No SIMD variant, must use VFP instead. */
+ SLJIT_ASSERT(reg_size == 4);
+
+ if (freg == src) {
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src));
+ }
+
+ FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x243219;
+ ins = VSHR | (1 << 24) | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x243219 : 0x2231;
+ ins = VSHR | (1 << 24) | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x2231 : 0x21;
+ ins = VSHR | (1 << 24) | (0x21 << 16);
+ break;
+ default:
+ imms = 0x21;
+ ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
+ break;
+ }
+
+ if (reg_size == 4) {
+ freg = simd_get_quad_reg_index(freg);
+ ins |= (1 << 6);
+ }
+
+ SLJIT_ASSERT((freg_map[TMP_FREG1] & 0x1) == 0);
+ FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VM(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG1) | VM(TMP_FREG1)));
+
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
+
+ while (imms >= 0x100) {
+ FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VM(TMP_FREG1)));
+ imms >>= 8;
+ }
+
+ FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG1) | VM(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG1)));
+
+ if (reg_size == 4 && elem_size == 0) {
+ SLJIT_ASSERT(freg_map[TMP_FREG1] + 1 == freg_map[TMP_FREG2]);
+ FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG2)));
+ FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
+ }
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
#undef FPU_LOAD
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
@@ -4104,6 +4364,9 @@
{
sljit_u32 ins;
+ /* temp_reg == mem_reg is undefined so use another temp register */
+ SLJIT_UNUSED_ARG(temp_reg);
+
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
@@ -4120,7 +4383,10 @@
}
FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
- return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1) | 0);
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
+
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c
index f9c64d7..8194670 100644
--- a/src/sljit/sljitNativeARM_64.c
+++ b/src/sljit/sljitNativeARM_64.c
@@ -98,6 +98,7 @@
#define FCMP 0x1e602000
#define FCSEL 0x1e600c00
#define FCVT 0x1e224000
+#define FCVTL 0x0e217800
#define FCVTZS 0x9e780000
#define FDIV 0x1e601800
#define FMOV 0x1e604000
@@ -143,7 +144,9 @@
#define SCVTF 0x9e620000
#define SDIV 0x9ac00c00
#define SMADDL 0x9b200000
+#define SMOV 0x0e002c00
#define SMULH 0x9b403c00
+#define SSHLL 0x0f00a400
#define ST1 0x0c007000
#define ST1_s 0x0d000000
#define STP 0xa9000000
@@ -169,12 +172,15 @@
#define UDIV 0x9ac00800
#define UMOV 0x0e003c00
#define UMULH 0x9bc03c00
+#define USHLL 0x2f00a400
+#define USHR 0x2f000400
+#define USRA 0x2f001400
+#define XTN 0x0e212800
#define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO))
#define LDR (STRI | (1 << 22))
#define LDRB (STRBI | (1 << 22))
#define LDRH (LDRB | (1 << 30))
-#define LDRSW ((LDRI ^ (1 << 30)) ^ (0x3 << 22))
#define MOV (ORR | RN(TMP_ZERO))
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
@@ -423,7 +429,7 @@
case SLJIT_HAS_FPU:
case SLJIT_HAS_SIMD:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
@@ -437,6 +443,7 @@
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
default:
@@ -1423,33 +1430,33 @@
break;
case SLJIT_MOV_U8:
mem_flags = BYTE_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u8)srcw;
break;
case SLJIT_MOV_S8:
mem_flags = BYTE_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s8)srcw;
break;
case SLJIT_MOV_U16:
mem_flags = HALF_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u16)srcw;
break;
case SLJIT_MOV_S16:
mem_flags = HALF_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
case SLJIT_MOV_U32:
mem_flags = INT_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u32)srcw;
break;
case SLJIT_MOV_S32:
case SLJIT_MOV32:
mem_flags = INT_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s32)srcw;
break;
default:
@@ -1458,7 +1465,7 @@
break;
}
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
else if (!(src & SLJIT_MEM))
dst_r = src;
@@ -1538,12 +1545,12 @@
src2 = TMP_REG2;
}
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
flags |= ARG1_IMM;
else
src1w = src1;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
flags |= ARG2_IMM;
else
src2w = src2;
@@ -1590,7 +1597,7 @@
inv_bits = (op & SLJIT_32) ? W_OP : 0;
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
mask = inv_bits ? 0x1f : 0x3f;
src3w &= mask;
@@ -1705,7 +1712,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128)
@@ -1808,7 +1815,7 @@
if (src & SLJIT_MEM) {
emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1);
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
@@ -1829,7 +1836,7 @@
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
inv_bits |= W_OP;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s32)srcw;
}
@@ -1845,7 +1852,7 @@
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
inv_bits |= W_OP;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u32)srcw;
}
@@ -2216,7 +2223,7 @@
PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
@@ -2240,7 +2247,7 @@
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
@@ -2349,7 +2356,7 @@
ADJUST_LOCAL_OFFSET(src1, src1w);
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
if (type & SLJIT_32)
src1w = (sljit_s32)src1w;
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
@@ -2775,13 +2782,13 @@
ins |= (sljit_ins)1 << 30;
if (type & SLJIT_SIMD_FLOAT) {
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg));
return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
}
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (elem_size < 3)
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
@@ -2822,6 +2829,18 @@
if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30);
+
+ if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
+
+ FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg)));
+ }
+
if (srcdst & SLJIT_MEM) {
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
@@ -2840,14 +2859,14 @@
if (type & SLJIT_SIMD_FLOAT) {
if (type & SLJIT_SIMD_STORE)
- ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | RD(srcdst) | VN(freg);
+ ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg);
else
- ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | RN(srcdst);
+ ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst);
return push_inst(compiler, ins);
}
- if (srcdst & SLJIT_IMM) {
+ if (srcdst == SLJIT_IMM) {
if (elem_size < 3)
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
@@ -2855,9 +2874,17 @@
srcdst = TMP_REG1;
}
- if (type & SLJIT_SIMD_STORE)
- ins = UMOV | RD(srcdst) | VN(freg);
- else
+ if (type & SLJIT_SIMD_STORE) {
+ ins = RD(srcdst) | VN(freg);
+
+ if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) {
+ ins |= SMOV;
+
+ if (!(type & SLJIT_32))
+ ins |= (sljit_ins)1 << 30;
+ } else
+ ins |= UMOV;
+ } else
ins = INS | VD(freg) | RN(srcdst);
if (elem_size == 3)
@@ -2894,6 +2921,129 @@
return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg)));
+ else
+ FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg)));
+ src = freg;
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ SLJIT_ASSERT(reg_size == 4);
+ return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src));
+ }
+
+ do {
+ FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL)
+ | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src)));
+ src = freg;
+ } while (++elem_size < elem2_size);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x643219;
+ ins = USHR | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x643219 : 0x6231;
+ ins = USHR | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x6231 : 0x61;
+ ins = USHR | (0x21 << 16);
+ break;
+ default:
+ imms = 0x61;
+ ins = USHR | (0x41 << 16);
+ break;
+ }
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+
+ if (imms >= 0x100) {
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0;
+
+ do {
+ FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+ imms >>= 8;
+ } while (imms >= 0x100);
+ }
+
+ FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ ins = (0x1 << 16);
+
+ if (reg_size == 4 && elem_size == 0) {
+ FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+ ins = (0x2 << 16);
+ }
+
+ FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1)));
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst_reg,
sljit_s32 mem_reg)
@@ -2906,10 +3056,8 @@
#ifdef __ARM_FEATURE_ATOMICS
switch (GET_OPCODE(op)) {
case SLJIT_MOV32:
- ins = LDR ^ (1 << 30);
- break;
case SLJIT_MOV_U32:
- ins = LDRSW;
+ ins = LDR ^ (1 << 30);
break;
case SLJIT_MOV_U16:
ins = LDRH;
@@ -2978,7 +3126,7 @@
}
if (cmp) {
- FAIL_IF(push_inst(compiler, MOV ^ inv_bits | RM(temp_reg) | RD(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1)));
tmp = TMP_REG1;
}
FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg)));
@@ -2986,7 +3134,7 @@
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg)));
- FAIL_IF(push_inst(compiler, CSET ^ inv_bits | RD(tmp)));
+ FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp)));
return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO));
#else /* !__ARM_FEATURE_ATOMICS */
SLJIT_UNUSED_ARG(tmp);
diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c
index 8d0b769..f914eb7 100644
--- a/src/sljit/sljitNativeARM_T2_32.c
+++ b/src/sljit/sljitNativeARM_T2_32.c
@@ -41,16 +41,26 @@
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
+#define TMP_FREG1 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 1)
+#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 2)
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
};
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
- 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7
+static const sljit_u8 freg_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3] = {
+ 0,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
+ 6, 7
+};
+
+static const sljit_u8 freg_ebit_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3] = {
+ 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0
};
#define COPY_BITS(src, from, to, bits) \
@@ -79,9 +89,11 @@
#define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
#define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
#define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
-#define VM4(dm) ((sljit_ins)freg_map[dm])
-#define VD4(dd) ((sljit_ins)freg_map[dd] << 12)
-#define VN4(dn) ((sljit_ins)freg_map[dn] << 16)
+
+#define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
+#define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
+#define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
+
#define IMM5(imm) \
(COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
#define IMM12(imm) \
@@ -128,12 +140,12 @@
#define EORS 0x4040
#define EOR_W 0xea800000
#define IT 0xbf00
-#define LDAEX 0xe8d00fef
-#define LDAEXB 0xe8d00fcf
-#define LDAEXH 0xe8d00fdf
#define LDR 0xf8d00000
#define LDR_SP 0x9800
#define LDRD 0xe9500000
+#define LDREX 0xe8500f00
+#define LDREXB 0xe8d00f4f
+#define LDREXH 0xe8d00f5f
#define LDRI 0xf8500800
#define LSLS 0x4080
#define LSLSI 0x0000
@@ -180,9 +192,9 @@
#define SMULL 0xfb800000
#define STR_SP 0x9000
#define STRD 0xe9400000
-#define STREX 0xe8c00fe0
-#define STREXB 0xe8c00fc0
-#define STREXH 0xe8c00fd0
+#define STREX 0xe8400000
+#define STREXB 0xe8c00f40
+#define STREXH 0xe8c00f50
#define SUBS 0x1a00
#define SUBSI3 0x1e00
#define SUBSI8 0x3800
@@ -222,17 +234,37 @@
#define VMOV2 0xec400a10
#define VMOV_i 0xef800010
#define VMOV_s 0xee000b10
+#define VMOVN 0xffb20200
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
#define VORR 0xef200110
#define VPOP 0xecbd0b00
#define VPUSH 0xed2d0b00
+#define VSHLL 0xef800a10
+#define VSHR 0xef800010
+#define VSRA 0xef800110
#define VST1 0xf9000000
#define VST1_s 0xf9800000
#define VSTR_F32 0xed000a00
#define VSUB_F32 0xee300a40
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (is_32 && fr >= (SLJIT_FS0 + SLJIT_FR0) && fr <= (SLJIT_FS0 + SLJIT_FS0))
+ fr -= SLJIT_FS0;
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0);
+}
+
+#endif /* SLJIT_ARGUMENT_CHECKS */
+
static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
{
sljit_u16 *ptr;
@@ -509,15 +541,16 @@
{
switch (feature_type) {
case SLJIT_HAS_FPU:
+ case SLJIT_HAS_F64_AS_F32_PAIR:
case SLJIT_HAS_SIMD:
- case SLJIT_SIMD_REGS_ARE_PAIRS:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
#endif
+ case SLJIT_SIMD_REGS_ARE_PAIRS:
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
case SLJIT_HAS_REV:
@@ -526,6 +559,7 @@
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
default:
@@ -682,9 +716,14 @@
break;
case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
- imm = get_imm(imm);
- if (imm != INVALID_IMM)
- return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ imm2 = get_imm(imm);
+ if (imm2 != INVALID_IMM)
+ return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
+ if (flags & ARG2_IMM) {
+ imm = get_imm(~imm);
+ if (imm != INVALID_IMM)
+ return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ }
break;
case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
@@ -737,9 +776,12 @@
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & ARG1_IMM)
break;
- imm = get_imm(imm);
+ imm2 = get_imm(imm);
+ if (imm2 != INVALID_IMM)
+ return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
+ imm = get_imm(~imm);
if (imm != INVALID_IMM)
- return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
+ return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
break;
case SLJIT_AND:
imm2 = get_imm(imm);
@@ -818,8 +860,7 @@
imm = arg2;
arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
- }
- else {
+ } else {
imm = arg1;
arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
@@ -1754,22 +1795,22 @@
break;
case SLJIT_MOV_U8:
flags = BYTE_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u8)srcw;
break;
case SLJIT_MOV_S8:
flags = BYTE_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s8)srcw;
break;
case SLJIT_MOV_U16:
flags = HALF_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u16)srcw;
break;
case SLJIT_MOV_S16:
flags = HALF_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
default:
@@ -1778,7 +1819,7 @@
break;
}
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
else if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
@@ -1831,7 +1872,7 @@
if (dst == TMP_REG1)
flags |= UNUSED_RETURN;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
flags |= ARG1_IMM;
else if (src1 & SLJIT_MEM) {
emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
@@ -1840,7 +1881,7 @@
else
src1w = src1;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
flags |= ARG2_IMM;
else if (src2 & SLJIT_MEM) {
src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1;
@@ -1889,7 +1930,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= 0x1f;
if (src3w == 0)
@@ -1998,7 +2039,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
@@ -2259,8 +2300,10 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
sljit_s32 freg, sljit_f32 value)
{
+#if defined(__ARM_NEON) && __ARM_NEON
sljit_u32 exp;
sljit_ins ins;
+#endif /* NEON */
union {
sljit_u32 imm;
sljit_f32 value;
@@ -2271,6 +2314,7 @@
u.value = value;
+#if defined(__ARM_NEON) && __ARM_NEON
if ((u.imm << (32 - 19)) == 0) {
exp = (u.imm >> (23 + 2)) & 0x3f;
@@ -2279,6 +2323,7 @@
return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
}
}
+#endif /* NEON */
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
@@ -2287,8 +2332,10 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
sljit_s32 freg, sljit_f64 value)
{
+#if defined(__ARM_NEON) && __ARM_NEON
sljit_u32 exp;
sljit_ins ins;
+#endif /* NEON */
union {
sljit_u32 imm[2];
sljit_f64 value;
@@ -2299,6 +2346,7 @@
u.value = value;
+#if defined(__ARM_NEON) && __ARM_NEON
if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
@@ -2307,6 +2355,7 @@
return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
}
}
+#endif /* NEON */
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
if (u.imm[0] == u.imm[1])
@@ -2729,7 +2778,7 @@
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (FAST_IS_REG(src)) {
SLJIT_ASSERT(reg_map[src] != 14);
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
@@ -2929,7 +2978,7 @@
cc = get_cc(compiler, type & ~SLJIT_32);
- if (!(src1 & SLJIT_IMM)) {
+ if (src1 != SLJIT_IMM) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
}
@@ -3018,7 +3067,7 @@
imm = get_imm((sljit_uw)(memw & ~0xfff));
if (imm != INVALID_IMM)
- memw &= 0xff;
+ memw &= 0xfff;
}
if (imm == INVALID_IMM) {
@@ -3357,6 +3406,20 @@
return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
}
+static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
+{
+ freg += freg & 0x1;
+
+ SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
+
+ if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
+ freg--;
+
+ return freg;
+}
+
+#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 freg,
sljit_s32 srcdst, sljit_sw srcdstw)
@@ -3381,9 +3444,12 @@
return SLJIT_SUCCESS;
if (reg_size == 4)
- freg -= 1 - (freg & 0x1);
+ freg = simd_get_quad_reg_index(freg);
if (!(srcdst & SLJIT_MEM)) {
+ if (reg_size == 4)
+ srcdst = simd_get_quad_reg_index(srcdst);
+
if (type & SLJIT_SIMD_STORE)
ins = VD4(srcdst) | VN4(freg) | VM4(freg);
else
@@ -3530,9 +3596,9 @@
return SLJIT_SUCCESS;
if (reg_size == 4)
- freg -= 1 - (freg & 0x1);
+ freg = simd_get_quad_reg_index(freg);
- if ((src & SLJIT_IMM) && srcw == 0)
+ if (src == SLJIT_IMM && srcw == 0)
return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg));
if (SLJIT_UNLIKELY(elem_size == 3)) {
@@ -3544,7 +3610,7 @@
} else if (freg != src)
FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
- freg++;
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
if (freg != src)
return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
@@ -3563,15 +3629,16 @@
}
if (type & SLJIT_SIMD_FLOAT) {
- ins = ((sljit_ins)1 << (16 + elem_size));
+ SLJIT_ASSERT(elem_size == 2);
+ ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
if (reg_size == 4)
ins |= 1 << 6;
- return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
+ return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]);
}
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (elem_size < 2)
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
@@ -3628,13 +3695,37 @@
if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
- if (reg_size == 4) {
- freg -= 1 - (freg & 0x1);
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
- if (lane_index >= (0x8 >> elem_size)) {
- lane_index -= (0x8 >> elem_size);
- freg++;
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
+ if (lane_index == 1)
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+
+ if (srcdst != freg)
+ FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst)));
+
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst32(compiler, VMOV_i | VD4(freg));
+ }
+
+ if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
+ FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG1) | VN4(freg) | VM4(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
}
+
+ FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg)));
+ }
+
+ if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
+ lane_index -= (0x8 >> elem_size);
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
}
if (srcdst & SLJIT_MEM) {
@@ -3656,16 +3747,22 @@
}
if (type & SLJIT_SIMD_STORE) {
- if (lane_index == 0)
- return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(srcdst) | VM4(freg));
- return push_inst32(compiler, VDUP_s | 0xc0000 | VD4(srcdst) | VM4(freg));
+ if (freg_ebit_map[freg] == 0) {
+ if (lane_index == 1)
+ freg = SLJIT_F64_SECOND(freg);
+
+ return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg));
+ }
+
+ FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)));
+ return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
}
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1));
}
- if (srcdst & SLJIT_IMM) {
+ if (srcdst == SLJIT_IMM) {
if (elem_size < 2)
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
@@ -3684,10 +3781,10 @@
ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
if (type & SLJIT_SIMD_STORE) {
- ins |= 0x100000;
+ ins |= (1 << 20);
- if (elem_size < 2)
- ins |= 0x800000;
+ if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
+ ins |= (1 << 23);
}
return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst));
@@ -3714,12 +3811,12 @@
return SLJIT_SUCCESS;
if (reg_size == 4) {
- freg -= 1 - (freg & 0x1);
- src -= 1 - (src & 0x1);
+ freg = simd_get_quad_reg_index(freg);
+ src = simd_get_quad_reg_index(src);
if (src_lane_index >= (0x8 >> elem_size)) {
src_lane_index -= (0x8 >> elem_size);
- src++;
+ src += SLJIT_QUAD_OTHER_HALF(src);
}
}
@@ -3727,7 +3824,7 @@
if (freg != src)
FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
- freg++;
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
if (freg != src)
return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
@@ -3742,26 +3839,168 @@
return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_s32 dst_reg;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ freg = simd_get_quad_reg_index(freg);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf));
+ else
+ FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf));
+ src = freg;
+ } else if (reg_size == 4)
+ src = simd_get_quad_reg_index(src);
+
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ dst_reg = (reg_size == 4) ? freg : TMP_FREG1;
+
+ do {
+ FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
+ | ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
+ src = dst_reg;
+ } while (++elem_size < elem2_size);
+
+ if (dst_reg == TMP_FREG1)
+ return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG1) | VM4(TMP_FREG1));
+ return SLJIT_SUCCESS;
+ }
+
+ /* No SIMD variant, must use VFP instead. */
+ SLJIT_ASSERT(reg_size == 4);
+
+ if (freg == src) {
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src));
+ }
+
+ FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)));
+ freg += SLJIT_QUAD_OTHER_HALF(freg);
+ return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x243219;
+ ins = VSHR | (1 << 28) | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x243219 : 0x2231;
+ ins = VSHR | (1 << 28) | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x2231 : 0x21;
+ ins = VSHR | (1 << 28) | (0x21 << 16);
+ break;
+ default:
+ imms = 0x21;
+ ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
+ break;
+ }
+
+ if (reg_size == 4) {
+ freg = simd_get_quad_reg_index(freg);
+ ins |= (1 << 6);
+ }
+
+ SLJIT_ASSERT((freg_map[TMP_FREG1] & 0x1) == 0);
+ FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG1) | VM4(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG1) | VM4(TMP_FREG1)));
+
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
+
+ while (imms >= 0x100) {
+ FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG1) | VM4(TMP_FREG1)));
+ imms >>= 8;
+ }
+
+ FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG1) | VM4(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG1)));
+
+ if (reg_size == 4 && elem_size == 0) {
+ SLJIT_ASSERT(freg_map[TMP_FREG1] + 1 == freg_map[TMP_FREG2]);
+ FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2) | VN4(TMP_FREG2)));
+ FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
+ }
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
#undef FPU_LOAD
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst_reg,
sljit_s32 mem_reg)
{
- sljit_ins ins = 0;
+ sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
switch (GET_OPCODE(op)) {
case SLJIT_MOV_U8:
- ins = LDAEXB;
+ ins = LDREXB;
break;
case SLJIT_MOV_U16:
- ins = LDAEXH;
+ ins = LDREXH;
break;
default:
- ins = LDAEX;
+ ins = LDREX;
break;
}
@@ -3773,25 +4012,31 @@
sljit_s32 mem_reg,
sljit_s32 temp_reg)
{
- sljit_ins ins = 0;
+ sljit_ins ins;
+
+ /* temp_reg == mem_reg is undefined so use another temp register */
+ SLJIT_UNUSED_ARG(temp_reg);
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
switch (GET_OPCODE(op)) {
case SLJIT_MOV_U8:
- ins = STREXB;
+ ins = STREXB | RM4(TMP_REG1);
break;
case SLJIT_MOV_U16:
- ins = STREXH;
+ ins = STREXH | RM4(TMP_REG1);
break;
default:
- ins = STREX;
+ ins = STREX | RD4(TMP_REG1);
break;
}
- FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)| RM4(TMP_REG1)));
- return push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0);
+ FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
+
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
diff --git a/src/sljit/sljitNativeLOONGARCH_64.c b/src/sljit/sljitNativeLOONGARCH_64.c
index 79e8202..fe6fad4 100644
--- a/src/sljit/sljitNativeLOONGARCH_64.c
+++ b/src/sljit/sljitNativeLOONGARCH_64.c
@@ -46,7 +46,6 @@
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
-
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
};
@@ -245,6 +244,12 @@
#define LL_D OPC_2RI14(0x22)
#define SC_D OPC_2RI14(0x23)
+/* LoongArch V1.10 Instructions */
+#define AMCAS_B OPC_3R(0x70B0)
+#define AMCAS_H OPC_3R(0x70B1)
+#define AMCAS_W OPC_3R(0x70B2)
+#define AMCAS_D OPC_3R(0x70B3)
+
/* Other instructions */
#define BREAK OPC_3R(0x54)
#define DBGCALL OPC_3R(0x55)
@@ -332,6 +337,19 @@
#define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
+/* LoongArch CPUCFG register for feature detection */
+#define LOONGARCH_CFG2 0x02
+#define LOONGARCH_FEATURE_LAMCAS (1 << 28)
+
+sljit_u32 cpu_feature_list = 0;
+
+static SLJIT_INLINE sljit_u32 get_cpu_features(void)
+{
+ if (cpu_feature_list == 0)
+ __asm__ ("cpucfg %0, %1" : "+&r"(cpu_feature_list) : "r"(LOONGARCH_CFG2));
+ return cpu_feature_list;
+}
+
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
{
sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
@@ -629,12 +647,15 @@
{
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
#endif
+ case SLJIT_HAS_ATOMIC:
+ return (LOONGARCH_FEATURE_LAMCAS & get_cpu_features());
+
case SLJIT_HAS_CLZ:
case SLJIT_HAS_CTZ:
case SLJIT_HAS_REV:
@@ -1603,11 +1624,11 @@
flags |= SLOW_DEST;
if (flags & IMM_OP) {
- if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
+ if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
flags |= SRC2_IMM;
src2_r = src2w;
}
- else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
+ else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
flags |= SRC2_IMM;
src2_r = src1w;
@@ -1624,7 +1645,7 @@
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
if (src1w) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
src1_r = TMP_REG1;
@@ -1647,7 +1668,7 @@
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
dst_r = (sljit_s32)src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
if (!(flags & SRC2_IMM)) {
if (src2w) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
@@ -1761,24 +1782,24 @@
return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_MOV_U32:
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
case SLJIT_MOV_S32:
/* Logical operators have no W variant, so sign extended input is necessary for them. */
case SLJIT_MOV32:
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
case SLJIT_MOV_U8:
- return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
@@ -1813,9 +1834,9 @@
if (op & SLJIT_32) {
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)src1w;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)src2w;
}
@@ -1848,7 +1869,7 @@
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (op & SLJIT_32)
src2w &= 0x1f;
else
@@ -1897,7 +1918,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= bit_length - 1;
if (src3w == 0)
@@ -2022,7 +2043,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type != SLJIT_FLOAT_REGISTER)
@@ -2126,7 +2147,7 @@
if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
@@ -2173,7 +2194,7 @@
if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
srcw = (sljit_u32)srcw;
@@ -2363,8 +2384,6 @@
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
break;
- case SLJIT_COPYSIGN_F64:
- return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_r) | FRJ(src1) | FRK(src2));
}
if (dst_r == TMP_FREG2)
@@ -2372,6 +2391,32 @@
return SLJIT_SUCCESS;
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w)
+{
+ sljit_s32 reg;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fop2r(compiler, op, dst, src1, src1w, src2, src2w));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
+ src2 = TMP_FREG1;
+ }
+
+ if (src1 & SLJIT_MEM) {
+ reg = (dst == src2) ? TMP_FREG1 : dst;
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
+ src1 = reg;
+ }
+
+ return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst) | FRJ(src1) | FRK(src2));
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
sljit_s32 freg, sljit_f32 value)
{
@@ -2451,8 +2496,10 @@
{
switch (type) {
case SLJIT_EQUAL:
+ case SLJIT_ATOMIC_NOT_STORED:
return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
case SLJIT_NOT_EQUAL:
+ case SLJIT_ATOMIC_STORED:
return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
case SLJIT_LESS:
case SLJIT_GREATER:
@@ -2576,7 +2623,7 @@
src2 = TMP_REG2;
}
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
if (src1w != 0) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
src1 = TMP_REG1;
@@ -2585,7 +2632,7 @@
src1 = TMP_ZERO;
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (src2w != 0) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w));
src2 = TMP_REG2;
@@ -2653,7 +2700,7 @@
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
@@ -2736,6 +2783,12 @@
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
src_r = dst_r;
break;
+ case SLJIT_ATOMIC_STORED:
+ case SLJIT_ATOMIC_NOT_STORED:
+ FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
+ src_r = dst_r;
+ invert ^= 0x1;
+ break;
case SLJIT_OVERFLOW:
case SLJIT_NOT_OVERFLOW:
if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
@@ -2829,7 +2882,7 @@
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
- } else if (src1 & SLJIT_IMM) {
+ } else if (src1 == SLJIT_IMM) {
if (type & SLJIT_32)
src1w = (sljit_s32)src1w;
FAIL_IF(load_immediate(compiler, dst_reg, src1w));
@@ -2935,15 +2988,33 @@
sljit_s32 dst_reg,
sljit_s32 mem_reg)
{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(dst_reg);
- SLJIT_UNUSED_ARG(mem_reg);
+ sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
- return SLJIT_ERR_UNSUPPORTED;
+ if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features()))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch(GET_OPCODE(op)) {
+ case SLJIT_MOV_U8:
+ ins = LD_BU;
+ break;
+ case SLJIT_MOV_U16:
+ ins = LD_HU;
+ break;
+ case SLJIT_MOV32:
+ ins = LD_W;
+ break;
+ case SLJIT_MOV_U32:
+ ins = LD_WU;
+ break;
+ default:
+ ins = LD_D;
+ break;
+ }
+
+ return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
@@ -2952,16 +3023,50 @@
sljit_s32 mem_reg,
sljit_s32 temp_reg)
{
- SLJIT_UNUSED_ARG(compiler);
- SLJIT_UNUSED_ARG(op);
- SLJIT_UNUSED_ARG(src_reg);
- SLJIT_UNUSED_ARG(mem_reg);
- SLJIT_UNUSED_ARG(temp_reg);
+ sljit_ins ins = 0;
+ sljit_ins unsign = 0;
+ sljit_s32 tmp = temp_reg;
CHECK_ERROR();
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
- return SLJIT_ERR_UNSUPPORTED;
+ if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features()))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV_U8:
+ ins = AMCAS_B;
+ unsign = BSTRPICK_D | (7 << 16);
+ break;
+ case SLJIT_MOV_U16:
+ ins = AMCAS_H;
+ unsign = BSTRPICK_D | (15 << 16);
+ break;
+ case SLJIT_MOV32:
+ ins = AMCAS_W;
+ break;
+ case SLJIT_MOV_U32:
+ ins = AMCAS_W;
+ unsign = BSTRPICK_D | (31 << 16);
+ break;
+ default:
+ ins = AMCAS_D;
+ break;
+ }
+
+ if (op & SLJIT_SET_ATOMIC_STORED) {
+ FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO)));
+ tmp = TMP_REG1;
+ }
+ FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
+ if (!(op & SLJIT_SET_ATOMIC_STORED))
+ return SLJIT_SUCCESS;
+
+ if (unsign)
+ FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
+
+ FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg)));
+ return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1));
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c
index f22346b..9620b94 100644
--- a/src/sljit/sljitNativeMIPS_32.c
+++ b/src/sljit/sljitNativeMIPS_32.c
@@ -26,6 +26,49 @@
/* mips 32-bit arch dependent functions. */
+static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_sw src1, sljit_sw src2, sljit_sw dst)
+{
+ int is_32 = (op & SLJIT_32);
+ sljit_ins mfhc = MFC1, mthc = MTC1;
+ sljit_ins src1_r = FS(src1), src2_r = FS(src2), dst_r = FS(dst);
+
+ if (!is_32) {
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ mfhc = MFHC1;
+ mthc = MTHC1;
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ src1_r |= (1 << 11);
+ src2_r |= (1 << 11);
+ dst_r |= (1 << 11);
+ break;
+ }
+ }
+
+ FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG1) | src1_r, DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, mfhc | T(TMP_REG2) | src2_r, DR(TMP_REG2)));
+ if (!is_32 && src1 != dst)
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(src1) | FD(dst), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ else
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, XOR | T(TMP_REG1) | D(TMP_REG2) | S(TMP_REG2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SLL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, XOR | T(TMP_REG2) | D(TMP_REG1) | S(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, mthc | T(TMP_REG1) | dst_r, MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (mthc == MTC1)
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
{
if (!(imm & ~0xffff))
@@ -48,7 +91,15 @@
sljit_s32 freg, sljit_f64 value)
{
union {
- sljit_s32 imm[2];
+ struct {
+#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN
+ sljit_s32 lo;
+ sljit_s32 hi;
+#else /* !SLJIT_LITTLE_ENDIAN */
+ sljit_s32 hi;
+ sljit_s32 lo;
+#endif /* SLJIT_LITTLE_ENDIAN */
+ } bin;
sljit_f64 value;
} u;
@@ -57,42 +108,85 @@
u.value = value;
- if (u.imm[0] != 0)
- FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm[0]));
- if (u.imm[1] != 0)
- FAIL_IF(load_immediate(compiler, DR(TMP_REG2), u.imm[1]));
+ if (u.bin.lo != 0)
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.bin.lo));
+ if (u.bin.hi != 0)
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG2), u.bin.hi));
- FAIL_IF(push_inst(compiler, MTC1 | (u.imm[0] != 0 ? T(TMP_REG1) : TA(0)) | FS(freg), MOVABLE_INS));
- return push_inst(compiler, MTC1 | (u.imm[1] != 0 ? T(TMP_REG2) : TA(0)) | FS(freg) | (1 << 11), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, MTC1 | (u.bin.lo != 0 ? T(TMP_REG1) : TA(0)) | FS(freg), MOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ return push_inst(compiler, MTHC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg), MOVABLE_INS);
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | (u.bin.hi != 0 ? T(TMP_REG2) : TA(0)) | FS(freg) | (1 << 11), MOVABLE_INS));
+ break;
+ }
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 freg, sljit_s32 reg)
{
- sljit_s32 reg2;
- sljit_ins inst;
+ sljit_s32 reg2 = 0;
+ sljit_ins inst = FS(freg);
+ sljit_ins mthc = MTC1, mfhc = MFC1;
+ int is_32 = (op & SLJIT_32);
CHECK_ERROR();
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+ op = GET_OPCODE(op);
if (reg & REG_PAIR_MASK) {
reg2 = REG_PAIR_SECOND(reg);
reg = REG_PAIR_FIRST(reg);
- inst = T(reg2) | FS(freg) | (1 << 11);
+ inst |= T(reg2);
if (op == SLJIT_COPY_TO_F64)
FAIL_IF(push_inst(compiler, MTC1 | inst, MOVABLE_INS));
else
FAIL_IF(push_inst(compiler, MFC1 | inst, DR(reg2)));
+
+ inst = FS(freg) | (1 << 11);
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ if (cpu_feature_list & CPU_FEATURE_FR) {
+ mthc = MTHC1;
+ mfhc = MFHC1;
+ inst = FS(freg);
+ }
+#endif /* SLJIT_MIPS_REV >= 2 */
}
- inst = T(reg) | FS(freg);
+ inst |= T(reg);
+ if (!is_32 && !reg2) {
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ mthc = MTHC1;
+ mfhc = MFHC1;
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ inst |= (1 << 11);
+ break;
+ }
+ }
- if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
- return push_inst(compiler, MTC1 | inst, MOVABLE_INS);
+ if (op == SLJIT_COPY_TO_F64)
+ FAIL_IF(push_inst(compiler, mthc | inst, MOVABLE_INS));
+ else
+ FAIL_IF(push_inst(compiler, mfhc | inst, DR(reg)));
- return push_inst(compiler, MFC1 | inst, DR(reg));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (mthc == MTC1 || mfhc == MFC1)
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
@@ -125,6 +219,11 @@
sljit_ins ins = NOP;
sljit_u8 offsets[4];
sljit_u8 *offsets_ptr = offsets;
+#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN
+ sljit_ins f64_hi = TA(7), f64_lo = TA(6);
+#else
+ sljit_ins f64_hi = TA(6), f64_lo = TA(7);
+#endif /* SLJIT_LITTLE_ENDIAN */
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
@@ -189,20 +288,28 @@
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
- if (*offsets_ptr < 4 * sizeof (sljit_sw)) {
+ if (*offsets_ptr < 4 * sizeof(sljit_sw)) {
if (prev_ins != NOP)
FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
/* Must be preceded by at least one other argument,
* and its starting offset must be 8 because of alignment. */
SLJIT_ASSERT((*offsets_ptr >> 2) == 2);
-
- prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11);
- ins = MFC1 | TA(7) | FS(float_arg_count);
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ prev_ins = MFHC1 | f64_hi | FS(float_arg_count);
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ prev_ins = MFC1 | f64_hi | FS(float_arg_count) | (1 << 11);
+ break;
+ }
+ ins = MFC1 | f64_lo | FS(float_arg_count);
} else if (*offsets_ptr < 254)
ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
else if (*offsets_ptr == 254)
- ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1);
float_arg_count--;
break;
@@ -212,7 +319,7 @@
else if (*offsets_ptr < 254)
ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
else if (*offsets_ptr == 254)
- ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1);
float_arg_count--;
break;
@@ -336,7 +443,7 @@
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
else if (src != PIC_ADDR_REG)
FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c
index a05e1a8..52a0d3f 100644
--- a/src/sljit/sljitNativeMIPS_64.c
+++ b/src/sljit/sljitNativeMIPS_64.c
@@ -26,6 +26,23 @@
/* mips 64-bit arch dependent functions. */
+static sljit_s32 emit_copysign(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src1, sljit_s32 src2, sljit_s32 dst)
+{
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG1) | FS(src1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG2) | FS(src2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG2) | T(TMP_REG1) | D(TMP_REG2), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | T(TMP_REG1) | FS(dst), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (!(op & SLJIT_32))
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
{
sljit_s32 shift = 32;
@@ -141,11 +158,20 @@
u.value = value;
- if (u.imm == 0)
- return push_inst(compiler, DMTC1 | TA(0) | FS(freg), MOVABLE_INS);
+ if (u.imm == 0) {
+ FAIL_IF(push_inst(compiler, DMTC1 | TA(0) | FS(freg), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
+ }
FAIL_IF(load_immediate(compiler, DR(TMP_REG1), u.imm));
- return push_inst(compiler, DMTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG1) | FS(freg), MOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
@@ -159,9 +185,15 @@
inst = T(reg) | FS(freg);
if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
- return push_inst(compiler, ((op & SLJIT_32) ? MTC1 : DMTC1) | inst, MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMTC1, MTC1) | inst, MOVABLE_INS));
+ else
+ FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | inst, DR(reg)));
- return push_inst(compiler, ((op & SLJIT_32) ? MFC1 : DMFC1) | inst, DR(reg));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ if (!(op & SLJIT_32))
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
+#endif /* MIPS III */
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
@@ -219,17 +251,17 @@
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (arg_count != float_arg_count)
- ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count);
+ ins = MOV_fmt(FMT_D) | FS(float_arg_count) | FD(arg_count);
else if (arg_count == 1)
- ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_D) | FS(SLJIT_FR0) | FD(TMP_FREG1);
arg_count--;
float_arg_count--;
break;
case SLJIT_ARG_TYPE_F32:
if (arg_count != float_arg_count)
- ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
+ ins = MOV_fmt(FMT_S) | FS(float_arg_count) | FD(arg_count);
else if (arg_count == 1)
- ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+ ins = MOV_fmt(FMT_S) | FS(SLJIT_FR0) | FD(TMP_FREG1);
arg_count--;
float_arg_count--;
break;
@@ -336,7 +368,7 @@
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
else if (src != PIC_ADDR_REG)
FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c
index 6e53385..d80a75c 100644
--- a/src/sljit/sljitNativeMIPS_common.c
+++ b/src/sljit/sljitNativeMIPS_common.c
@@ -26,9 +26,12 @@
/* Latest MIPS architecture. */
-#ifndef __mips_hard_float
+#ifdef HAVE_PRCTL
+#include <sys/prctl.h>
+#endif
+
+#if !defined(__mips_hard_float) || defined(__mips_single_float)
/* Disable automatic detection, covers both -msoft-float and -mno-float */
-#undef SLJIT_IS_FPU_AVAILABLE
#define SLJIT_IS_FPU_AVAILABLE 0
#endif
@@ -42,6 +45,14 @@
return "MIPS64-R6" SLJIT_CPUINFO;
#endif /* SLJIT_CONFIG_MIPS_32 */
+#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 5)
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+ return "MIPS32-R5" SLJIT_CPUINFO;
+#else /* !SLJIT_CONFIG_MIPS_32 */
+ return "MIPS64-R5" SLJIT_CPUINFO;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+
#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -83,27 +94,34 @@
#define EQUAL_FLAG 3
#define OTHER_FLAG 1
-#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
-#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
-#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3)
-
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
};
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
- 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16
+#define TMP_FREG1 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 1)
+#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 2)
+#define TMP_FREG3 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 3)
+
+static const sljit_u8 freg_map[(SLJIT_NUMBER_OF_FLOAT_REGISTERS << 1) + 4] = {
+ 0,
+ 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20,
+ 1, 15, 3, 5, 7, 9, 19, 31, 29, 27, 25, 23, 21,
+ 12, 10, 16,
};
-#else
+#else /* !SLJIT_CONFIG_MIPS_32 */
+
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
+#define TMP_FREG3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3)
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10
};
-#endif
+#endif /* SLJIT_CONFIG_MIPS_32 */
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -200,12 +218,18 @@
#define DMULTU (HI(0) | LO(29))
#endif /* SLJIT_MIPS_REV >= 6 */
#define DIV_S (HI(17) | FMT_S | LO(3))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define DINSU (HI(31) | LO(6))
-#define DMFC1 (HI(17) | (1 << 21) | LO(0))
-#define DMTC1 (HI(17) | (5 << 21) | LO(0))
+#endif /* SLJIT_MIPS_REV >= 2 */
+#define DMFC1 (HI(17) | (1 << 21))
+#define DMTC1 (HI(17) | (5 << 21))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define DROTR (HI(0) | (1 << 21) | LO(58))
#define DROTR32 (HI(0) | (1 << 21) | LO(62))
#define DROTRV (HI(0) | (1 << 6) | LO(22))
+#define DSBH (HI(31) | (2 << 6) | LO(36))
+#define DSHD (HI(31) | (5 << 6) | LO(36))
+#endif /* SLJIT_MIPS_REV >= 2 */
#define DSLL (HI(0) | LO(56))
#define DSLL32 (HI(0) | LO(60))
#define DSLLV (HI(0) | LO(20))
@@ -233,7 +257,10 @@
#define LWL (HI(34))
#define LWR (HI(38))
#define LWC1 (HI(49))
-#define MFC1 (HI(17) | (0 << 21))
+#define MFC1 (HI(17))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#define MFHC1 (HI(17) | (3 << 21))
+#endif /* SLJIT_MIPS_REV >= 2 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define MOD (HI(0) | (3 << 6) | LO(26))
#define MODU (HI(0) | (3 << 6) | LO(27))
@@ -241,8 +268,10 @@
#define MFHI (HI(0) | LO(16))
#define MFLO (HI(0) | LO(18))
#endif /* SLJIT_MIPS_REV >= 6 */
-#define MOV_S (HI(17) | FMT_S | LO(6))
#define MTC1 (HI(17) | (4 << 21))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#define MTHC1 (HI(17) | (7 << 21))
+#endif /* SLJIT_MIPS_REV >= 2 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
#define MUH (HI(0) | (3 << 6) | LO(24))
#define MUHU (HI(0) | (3 << 6) | LO(25))
@@ -258,8 +287,10 @@
#define NOR (HI(0) | LO(39))
#define OR (HI(0) | LO(37))
#define ORI (HI(13))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define ROTR (HI(0) | (1 << 21) | LO(2))
#define ROTRV (HI(0) | (1 << 6) | LO(6))
+#endif /* SLJIT_MIPS_REV >= 2 */
#define SD (HI(63))
#define SDL (HI(44))
#define SDR (HI(45))
@@ -281,6 +312,9 @@
#define SWR (HI(46))
#define SWC1 (HI(57))
#define TRUNC_W_S (HI(17) | FMT_S | LO(13))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#define WSBH (HI(31) | (2 << 6) | LO(32))
+#endif /* SLJIT_MIPS_REV >= 2 */
#define XOR (HI(0) | LO(38))
#define XORI (HI(14))
@@ -302,8 +336,10 @@
#endif /* SLJIT_MIPS_REV >= 6 */
#define PREF (HI(51))
#define PREFX (HI(19) | LO(15))
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
#define SEB (HI(31) | (16 << 6) | LO(32))
#define SEH (HI(31) | (24 << 6) | LO(32))
+#endif /* SLJIT_MIPS_REV >= 2 */
#endif /* SLJIT_MIPS_REV >= 1 */
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -324,10 +360,106 @@
#define LOAD_W LD
#endif
+#define MOV_fmt(f) (HI(17) | f | LO(6))
+
#define SIMM_MAX (0x7fff)
#define SIMM_MIN (-0x8000)
#define UIMM_MAX (0xffff)
+#define CPU_FEATURE_DETECTED (1 << 0)
+#define CPU_FEATURE_FPU (1 << 1)
+#define CPU_FEATURE_FP64 (1 << 2)
+#define CPU_FEATURE_FR (1 << 3)
+
+static sljit_u32 cpu_feature_list = 0;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ && (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+
+static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
+{
+ if (compiler->scratches == -1)
+ return 0;
+
+ if (is_32 && fr >= (SLJIT_FS0 + SLJIT_FR0) && fr <= (SLJIT_FS0 + SLJIT_FS0))
+ fr -= SLJIT_FS0;
+
+ return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
+ || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0);
+}
+
+#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_ARGUMENT_CHECKS */
+
+static void get_cpu_features(void)
+{
+#if !defined(SLJIT_IS_FPU_AVAILABLE) && defined(__GNUC__)
+ sljit_u32 fir = 0;
+#endif /* !SLJIT_IS_FPU_AVAILABLE && __GNUC__ */
+ sljit_u32 feature_list = CPU_FEATURE_DETECTED;
+
+#if defined(SLJIT_IS_FPU_AVAILABLE)
+#if SLJIT_IS_FPU_AVAILABLE
+ feature_list |= CPU_FEATURE_FPU;
+#if SLJIT_IS_FPU_AVAILABLE == 64
+ feature_list |= CPU_FEATURE_FP64;
+#endif /* SLJIT_IS_FPU_AVAILABLE == 64 */
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+#elif defined(__GNUC__)
+ __asm__ ("cfc1 %0, $0" : "=r"(fir));
+ if ((fir & (0x3 << 16)) == (0x3 << 16))
+ feature_list |= CPU_FEATURE_FPU;
+
+#if (defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64) \
+ && (!defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV < 2)
+ if ((feature_list & CPU_FEATURE_FPU))
+ feature_list |= CPU_FEATURE_FP64;
+#else /* SLJIT_CONFIG_MIPS32 || SLJIT_MIPS_REV >= 2 */
+ if ((fir & (1 << 22)))
+ feature_list |= CPU_FEATURE_FP64;
+#endif /* SLJIT_CONFIG_MIPS_64 && SLJIT_MIPS_REV < 2 */
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+
+ if ((feature_list & CPU_FEATURE_FPU) && (feature_list & CPU_FEATURE_FP64)) {
+#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 6
+ feature_list |= CPU_FEATURE_FR;
+#elif defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 0
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 5
+ feature_list |= CPU_FEATURE_FR;
+#endif /* SLJIT_MIPS_REV >= 5 */
+#else
+ sljit_s32 flag = -1;
+#ifndef FR_GET_FP_MODE
+ sljit_f64 zero = 0.0;
+#else /* PR_GET_FP_MODE */
+ flag = prctl(PR_GET_FP_MODE);
+
+ if (flag > 0)
+ feature_list |= CPU_FEATURE_FR;
+#endif /* FP_GET_PR_MODE */
+#if ((defined(SLJIT_DETECT_FR) && SLJIT_DETECT_FR == 2) \
+ || (!defined(PR_GET_FP_MODE) && (!defined(SLJIT_DETECT_FR) || SLJIT_DETECT_FR >= 1))) \
+ && (defined(__GNUC__) && (defined(__mips) && __mips >= 2))
+ if (flag < 0) {
+ __asm__ (".set oddspreg\n"
+ "lwc1 $f17, %0\n"
+ "ldc1 $f16, %1\n"
+ "swc1 $f17, %0\n"
+ : "+m" (flag) : "m" (zero) : "$f16", "$f17");
+ if (flag)
+ feature_list |= CPU_FEATURE_FR;
+ }
+#endif /* (!PR_GET_FP_MODE || (PR_GET_FP_MODE && SLJIT_DETECT_FR == 2)) && __GNUC__ */
+#endif /* SLJIT_MIPS_REV >= 6 */
+#else /* !SLJIT_CONFIG_MIPS_32 */
+ /* StatusFR=1 is the only mode supported by the code in MIPS64 */
+ feature_list |= CPU_FEATURE_FR;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+ }
+
+ cpu_feature_list = feature_list;
+}
+
/* dest_reg is the absolute name of the register
Useful for reordering instructions in the delay slot. */
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot)
@@ -721,20 +853,20 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
-#if defined(__GNUC__) && !defined(SLJIT_IS_FPU_AVAILABLE)
- sljit_sw fir = 0;
-#endif /* __GNUC__ && !SLJIT_IS_FPU_AVAILABLE */
-
switch (feature_type) {
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ && (!defined(SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE)
+ case SLJIT_HAS_F64_AS_F32_PAIR:
+ if (!cpu_feature_list)
+ get_cpu_features();
+
+ return (cpu_feature_list & CPU_FEATURE_FR) != 0;
+#endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_IS_FPU_AVAILABLE */
case SLJIT_HAS_FPU:
-#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
-#elif defined(__GNUC__)
- __asm__ ("cfc1 %0, $0" : "=r"(fir));
- return (fir >> 22) & 0x1;
-#else
-#error "FIR check is not implemented for this architecture"
-#endif
+ if (!cpu_feature_list)
+ get_cpu_features();
+
+ return (cpu_feature_list & CPU_FEATURE_FPU) != 0;
case SLJIT_HAS_ZERO_REGISTER:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
@@ -749,6 +881,7 @@
return 2;
#endif /* SLJIT_MIPS_REV >= 1 */
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
return 1;
#endif /* SLJIT_MIPS_REV >= 2 */
@@ -801,6 +934,12 @@
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr);
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+#define SELECT_OP(a, b) (b)
+#else
+#define SELECT_OP(a, b) (!(op & SLJIT_32) ? a : b)
+#endif
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#include "sljitNativeMIPS_32.c"
#else
#include "sljitNativeMIPS_64.c"
@@ -927,10 +1066,19 @@
if (word_arg_count == 0 && float_arg_count <= 2) {
if (float_arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
} else if (arg_count < 4) {
FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ FAIL_IF(push_inst(compiler, MTHC1 | TA(5 + arg_count) | FS(float_arg_count), MOVABLE_INS));
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS));
+ break;
+ }
} else
FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS));
arg_count++;
@@ -940,7 +1088,7 @@
if (word_arg_count == 0 && float_arg_count <= 2) {
if (float_arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
} else if (arg_count < 4)
FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS));
else
@@ -975,16 +1123,16 @@
case SLJIT_ARG_TYPE_F64:
float_arg_count++;
if (arg_count != float_arg_count)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
else if (arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_D) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
break;
case SLJIT_ARG_TYPE_F32:
float_arg_count++;
if (arg_count != float_arg_count)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(arg_count) | FD(float_arg_count), MOVABLE_INS));
else if (arg_count == 1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT_S) | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
break;
default:
word_arg_count++;
@@ -1147,7 +1295,7 @@
FAIL_IF(emit_stack_frame_release(compiler, 1, &ins));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS));
return push_inst(compiler, ins, UNMOVABLE_INS);
}
@@ -1397,16 +1545,12 @@
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define SELECT_OP(a, b) (b)
-
#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \
op_imm = (imm); \
op_v = (v);
#else /* !SLJIT_CONFIG_MIPS_32 */
-#define SELECT_OP(a, b) \
- (!(op & SLJIT_32) ? a : b)
#define EMIT_SHIFT(dimm, dimm32, imm, dv, v) \
op_dimm = (dimm); \
@@ -1424,9 +1568,9 @@
sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ);
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
-#else /* !SLJIT_CONFIG_RISCV_64 */
+#else /* !SLJIT_CONFIG_MIPS_64 */
sljit_ins word_size = 32;
-#endif /* SLJIT_CONFIG_RISCV_64 */
+#endif /* SLJIT_CONFIG_MIPS_64 */
/* The TMP_REG2 is the next value. */
if (src != TMP_REG2)
@@ -1470,21 +1614,36 @@
static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
{
- SLJIT_UNUSED_ARG(op);
+#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
+ int is_32 = (op & SLJIT_32);
+#endif /* SLJIT_CONFIG_MIPS_64 */
-#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (!(op & SLJIT_32)) {
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
- FAIL_IF(push_inst(compiler, ORI | SA(0) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG));
- FAIL_IF(push_inst(compiler, DROTR32 | T(src) | D(dst) | SH_IMM(0), DR(dst)));
- FAIL_IF(push_inst(compiler, DSLL32 | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(0), OTHER_FLAG));
+ op = GET_OPCODE(op);
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
+ if (!is_32 && (op == SLJIT_REV)) {
+ FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst)));
+ return push_inst(compiler, DSHD | T(dst) | D(dst), DR(dst));
+ }
+ if (op != SLJIT_REV && src != TMP_REG2) {
+ FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG1), DR(TMP_REG1)));
+ src = TMP_REG1;
+ }
+#endif /* SLJIT_CONFIG_MIPS_64 */
+ FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst)));
+ FAIL_IF(push_inst(compiler, ROTR | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
+#if defined(SLJIT_CONFIG_MIPS_64) && SLJIT_CONFIG_MIPS_64
+ if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3)
+ FAIL_IF(push_inst(compiler, DINSU | T(dst) | SA(0) | (31 << 11), DR(dst)));
+#endif /* SLJIT_CONFIG_MIPS_64 */
#else /* SLJIT_MIPS_REV < 2 */
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ if (!is_32) {
FAIL_IF(push_inst(compiler, DSRL32 | T(src) | D(TMP_REG1) | SH_IMM(0), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, ORI | SA(0) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG));
FAIL_IF(push_inst(compiler, DSLL32 | T(src) | D(dst) | SH_IMM(0), DR(dst)));
FAIL_IF(push_inst(compiler, DSLL32 | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(0), OTHER_FLAG));
FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
-#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSRL | T(dst) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xffff, OTHER_FLAG));
@@ -1502,23 +1661,17 @@
return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst));
}
- if (GET_OPCODE(op) != SLJIT_REV && src != TMP_REG2) {
+ if (op != SLJIT_REV && src != TMP_REG2) {
FAIL_IF(push_inst(compiler, SLL | T(src) | D(TMP_REG2) | SH_IMM(0), DR(TMP_REG2)));
src = TMP_REG2;
}
#endif /* SLJIT_CONFIG_MIPS_64 */
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
- FAIL_IF(push_inst(compiler, LUI | TA(OTHER_FLAG) | 0xff, OTHER_FLAG));
- FAIL_IF(push_inst(compiler, ROTR | T(src) | D(dst) | SH_IMM(16), DR(dst)));
- FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xff, OTHER_FLAG));
-#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SRL | T(src) | D(TMP_REG1) | SH_IMM(16), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, LUI | TA(OTHER_FLAG) | 0xff, OTHER_FLAG));
FAIL_IF(push_inst(compiler, SLL | T(src) | D(dst) | SH_IMM(16), DR(dst)));
FAIL_IF(push_inst(compiler, ORI | SA(OTHER_FLAG) | TA(OTHER_FLAG) | 0xff, OTHER_FLAG));
FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
-#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, SRL | T(dst) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, AND | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
@@ -1527,25 +1680,34 @@
FAIL_IF(push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst)));
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (GET_OPCODE(op) == SLJIT_REV_U32 && dst != TMP_REG2) {
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
- return push_inst(compiler, DINSU | T(dst) | SA(0) | (31 << 11) | (0 << 6), DR(dst));
-#else /* SLJIT_MIPS_REV < 2 */
+ if (op == SLJIT_REV_U32 && dst != TMP_REG2 && dst != TMP_REG3) {
FAIL_IF(push_inst(compiler, DSLL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)));
- return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 2 */
+ FAIL_IF(push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)));
}
#endif /* SLJIT_CONFIG_MIPS_64 */
+#endif /* SLJIT_MIPR_REV >= 2 */
return SLJIT_SUCCESS;
}
static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
{
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+#if defined(SLJIT_CONFIG_MIPS_32) && SLJIT_CONFIG_MIPS_32
+ FAIL_IF(push_inst(compiler, WSBH | T(src) | D(dst), DR(dst)));
+#else /* !SLJIT_CONFIG_MIPS_32 */
+ FAIL_IF(push_inst(compiler, DSBH | T(src) | D(dst), DR(dst)));
+#endif /* SLJIT_CONFIG_MIPS_32 */
+ if (GET_OPCODE(op) == SLJIT_REV_U16)
+ return push_inst(compiler, ANDI | S(dst) | T(dst) | 0xffff, DR(dst));
+ else
+ return push_inst(compiler, SEH | T(dst) | D(dst), DR(dst));
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL, SRL) | T(src) | D(TMP_REG1) | SH_IMM(8), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(src) | D(dst) | SH_IMM(24), DR(dst)));
FAIL_IF(push_inst(compiler, ANDI | S(TMP_REG1) | T(TMP_REG1) | 0xff, DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SELECT_OP(DSRL32, SRL) : SELECT_OP(DSRA32, SRA)) | T(dst) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, OR | S(dst) | T(TMP_REG1) | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 2 */
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
@@ -1575,17 +1737,17 @@
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
-#else /* SLJIT_MIPS_REV < 1 */
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
#else /* !SLJIT_CONFIG_MIPS_32 */
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (op & SLJIT_32)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
#endif /* SLJIT_CONFIG_MIPS_32 */
@@ -1604,17 +1766,17 @@
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
-#else /* SLJIT_MIPS_REV < 1 */
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
#else /* !SLJIT_CONFIG_MIPS_32 */
-#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (op & SLJIT_32)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
-#endif /* SLJIT_MIPS_REV >= 1 */
+#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
#endif /* SLJIT_CONFIG_MIPS_32 */
@@ -1628,7 +1790,7 @@
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
if (dst == src2)
- return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst));
+ return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11), DR(dst));
#endif /* SLJIT_MIPS_REV >= 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
@@ -2144,10 +2306,10 @@
flags |= SLOW_DEST;
if (flags & IMM_OP) {
- if ((src2 & SLJIT_IMM) && src2w != 0 && CHECK_IMM(flags, src2w)) {
+ if (src2 == SLJIT_IMM && src2w != 0 && CHECK_IMM(flags, src2w)) {
flags |= SRC2_IMM;
src2_r = src2w;
- } else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && CHECK_IMM(flags, src1w)) {
+ } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && CHECK_IMM(flags, src1w)) {
flags |= SRC2_IMM;
src2_r = src1w;
@@ -2164,7 +2326,7 @@
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
if (src1w) {
FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w));
src1_r = TMP_REG1;
@@ -2187,7 +2349,7 @@
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
dst_r = (sljit_s32)src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
if (!(flags & SRC2_IMM)) {
if (src2w) {
FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w));
@@ -2375,24 +2537,24 @@
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
case SLJIT_MOV_U32:
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
case SLJIT_MOV_S32:
case SLJIT_MOV32:
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
#endif
case SLJIT_MOV_U8:
- return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
@@ -2428,9 +2590,9 @@
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (op & SLJIT_32) {
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)src1w;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)src2w;
}
#endif
@@ -2451,7 +2613,7 @@
return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_XOR:
- if (((src1 & SLJIT_IMM) && src1w == -1) || ((src2 & SLJIT_IMM) && src2w == -1)) {
+ if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
}
/* fallthrough */
@@ -2468,10 +2630,10 @@
case SLJIT_ROTL:
case SLJIT_ROTR:
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w &= 0x1f;
#else
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (op & SLJIT_32)
src2w &= 0x1f;
else
@@ -2532,7 +2694,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= bit_length - 1;
if (src3w == 0)
@@ -2653,7 +2815,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type != SLJIT_FLOAT_REGISTER)
@@ -2665,6 +2827,8 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size)
{
+ SLJIT_UNUSED_ARG(size);
+
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -2676,14 +2840,14 @@
/* --------------------------------------------------------------------- */
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
-#define FMT(op) ((~(sljit_ins)op & SLJIT_32) << (21 - 8))
+#define FMT(op) (FMT_S | (~(sljit_ins)op & SLJIT_32) << (21 - (5 + 3)))
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# define flags (sljit_u32)0
+ sljit_u32 flags = 0;
#else
sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21;
#endif
@@ -2697,18 +2861,13 @@
if (FAST_IS_REG(dst)) {
FAIL_IF(push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+#endif /* MIPS III */
return SLJIT_SUCCESS;
}
- /* Store the integer value from a VFP register. */
return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0);
-
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# undef flags
-#endif
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2722,11 +2881,10 @@
#endif
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- if (src & SLJIT_MEM) {
- /* Load the integer value into a VFP register. */
+ if (src & SLJIT_MEM)
FAIL_IF(emit_op_mem2(compiler, (flags ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw));
- } else {
- if (src & SLJIT_IMM) {
+ else {
+ if (src == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
@@ -2736,9 +2894,9 @@
}
FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+#endif /* MIPS III */
}
FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
@@ -2762,7 +2920,7 @@
if (src & SLJIT_MEM) {
FAIL_IF(emit_op_mem2(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW ? WORD_DATA : INT_DATA) | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw));
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
srcw = (sljit_u32)srcw;
@@ -2779,9 +2937,9 @@
}
FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+#if !defined(SLJIT_MIPS_REV)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+#endif /* MIPS III */
FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
@@ -2795,9 +2953,9 @@
FAIL_IF(push_inst(compiler, SRL | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(1), DR(TMP_REG2)));
FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG2) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+#endif /* MIPS III */
FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | 1 | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
@@ -2809,11 +2967,19 @@
FAIL_IF(push_inst(compiler, LUI | T(TMP_REG2) | IMM(0x41e0), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, MTC1 | TA(0) | FS(TMP_FREG2), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(TMP_FREG2) | (1 << 11), UNMOVABLE_INS));
-
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ FAIL_IF(push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(TMP_FREG2), UNMOVABLE_INS));
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(TMP_FREG2) | (1 << 11), UNMOVABLE_INS));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ break;
+ }
FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(TMP_FREG2) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS));
if (dst & SLJIT_MEM)
@@ -2830,9 +2996,9 @@
FAIL_IF(push_inst(compiler, ANDI | S(src) | T(TMP_REG2) | IMM(1), DR(TMP_REG2)));
FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+#if !defined(SLJIT_MIPS_REV)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+#endif /* !SLJIT_MIPS_REV */
FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
@@ -2851,9 +3017,9 @@
FAIL_IF(push_inst(compiler, OR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 1)
+#if !defined(SLJIT_MIPS_REV)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif /* SLJIT_MIPS_REV <= 1 */
+#endif /* !SLJIT_MIPS_REV */
FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((~(sljit_ins)op & SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(dst_r) | FS(dst_r) | FD(dst_r), UNMOVABLE_INS));
@@ -2939,7 +3105,7 @@
case SLJIT_MOV_F64:
if (src != dst_r) {
if (dst_r != TMP_FREG1)
- FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS));
else
dst_r = src;
}
@@ -3030,13 +3196,7 @@
FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
case SLJIT_COPYSIGN_F64:
- FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG1) | FS(src1), DR(TMP_REG1)));
- FAIL_IF(push_inst(compiler, SELECT_OP(DMFC1, MFC1) | T(TMP_REG2) | FS(src2), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, XOR | S(TMP_REG2) | T(TMP_REG1) | D(TMP_REG2), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(31), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | T(TMP_REG2) | D(TMP_REG1), DR(TMP_REG1)));
- return push_inst(compiler, SELECT_OP(DMTC1, MTC1) | T(TMP_REG1) | FS(dst_r), MOVABLE_INS);
+ return emit_copysign(compiler, op, src1, src2, dst_r);
}
if (dst_r == TMP_FREG2)
@@ -3221,7 +3381,7 @@
}
#define RESOLVE_IMM1() \
- if (src1 & SLJIT_IMM) { \
+ if (src1 == SLJIT_IMM) { \
if (src1w) { \
PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \
src1 = TMP_REG1; \
@@ -3231,7 +3391,7 @@
}
#define RESOLVE_IMM2() \
- if (src2 & SLJIT_IMM) { \
+ if (src2 == SLJIT_IMM) { \
if (src2w) { \
PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \
src2 = TMP_REG2; \
@@ -3283,10 +3443,9 @@
if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2)))
jump->flags |= IS_MOVABLE;
PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | BRANCH_LENGTH, UNMOVABLE_INS));
- }
- else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) {
+ } else if (type >= SLJIT_SIG_LESS && ((src1 == SLJIT_IMM && src1w == 0) || (src2 == SLJIT_IMM && src2w == 0))) {
inst = NOP;
- if ((src1 & SLJIT_IMM) && (src1w == 0)) {
+ if (src1 == SLJIT_IMM && src1w == 0) {
RESOLVE_IMM2();
switch (type) {
case SLJIT_SIG_LESS:
@@ -3334,7 +3493,7 @@
else {
if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) {
RESOLVE_IMM1();
- if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
+ if (src2 == SLJIT_IMM && src2w <= SIMM_MAX && src2w >= SIMM_MIN)
PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1)));
else {
RESOLVE_IMM2();
@@ -3344,7 +3503,7 @@
}
else {
RESOLVE_IMM2();
- if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
+ if (src1 == SLJIT_IMM && src1w <= SIMM_MAX && src1w >= SIMM_MIN)
PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1)));
else {
RESOLVE_IMM1();
@@ -3386,7 +3545,7 @@
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
@@ -3418,8 +3577,7 @@
#endif
}
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
- return SLJIT_SUCCESS;
+ return push_inst(compiler, NOP, UNMOVABLE_INS);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
@@ -3594,12 +3752,13 @@
CHECK_ERROR();
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src1, src1w));
src1 = TMP_REG2;
- } else if (src1 & SLJIT_IMM) {
+ } else if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (type & SLJIT_32)
src1w = (sljit_s32)src1w;
@@ -3645,11 +3804,11 @@
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, DR(dst_reg), src1, src1w));
- } else if (src1 & SLJIT_IMM) {
+ } else if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (type & SLJIT_32)
src1w = (sljit_s32)src1w;
-#endif /* SLJIT_CONFIG_RISCV_64 */
+#endif /* SLJIT_CONFIG_MIPS_64 */
FAIL_IF(load_immediate(compiler, DR(dst_reg), src1w));
} else
FAIL_IF(push_inst(compiler, mov_ins | S(src1) | TA(0) | D(dst_reg), DR(dst_reg)));
@@ -3684,7 +3843,7 @@
src1w = 0;
type ^= 0x1;
} else
- FAIL_IF(push_inst(compiler, MOV_S | FMT(type) | FS(src2_freg) | FD(dst_freg), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src2_freg) | FD(dst_freg), MOVABLE_INS));
}
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
@@ -3703,7 +3862,7 @@
if (src1 & SLJIT_MEM)
FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(dst_freg), src1, src1w));
else
- FAIL_IF(push_inst(compiler, MOV_S | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MOV_fmt(FMT(type)) | FS(src1) | FD(dst_freg), MOVABLE_INS));
SLJIT_SKIP_CHECKS(compiler);
label = sljit_emit_label(compiler);
@@ -3759,15 +3918,27 @@
}
#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
-#define MEM16_IMM_FIRST(memw) IMM((memw) + 1)
-#define MEM16_IMM_SECOND(memw) IMM(memw)
-#define MEMF64_FS_FIRST(freg) FS(freg)
-#define MEMF64_FS_SECOND(freg) (FS(freg) | ((sljit_ins)1 << 11))
+#define IMM_LEFT(memw) IMM((memw) + SSIZE_OF(sw) - 1)
+#define IMM_RIGHT(memw) IMM(memw)
+#define IMM_32_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_32_RIGHT(memw) IMM(memw)
+#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_F64_FIRST_RIGHT(memw) IMM(memw)
+#define IMM_F64_SECOND_LEFT(memw) IMM((memw) + SSIZE_OF(f64) - 1)
+#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32))
+#define IMM_16_FIRST(memw) IMM((memw) + 1)
+#define IMM_16_SECOND(memw) IMM(memw)
#else /* !SLJIT_LITTLE_ENDIAN */
-#define MEM16_IMM_FIRST(memw) IMM(memw)
-#define MEM16_IMM_SECOND(memw) IMM((memw) + 1)
-#define MEMF64_FS_FIRST(freg) (FS(freg) | ((sljit_ins)1 << 11))
-#define MEMF64_FS_SECOND(freg) FS(freg)
+#define IMM_LEFT(memw) IMM(memw)
+#define IMM_RIGHT(memw) IMM((memw) + SSIZE_OF(sw) - 1)
+#define IMM_32_LEFT(memw) IMM(memw)
+#define IMM_32_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_F64_FIRST_LEFT(memw) IMM((memw) + SSIZE_OF(s32))
+#define IMM_F64_FIRST_RIGHT(memw) IMM((memw) + SSIZE_OF(f64) - 1)
+#define IMM_F64_SECOND_LEFT(memw) IMM(memw)
+#define IMM_F64_SECOND_RIGHT(memw) IMM((memw) + SSIZE_OF(s32) - 1)
+#define IMM_16_FIRST(memw) IMM(memw)
+#define IMM_16_SECOND(memw) IMM((memw) + 1)
#endif /* SLJIT_LITTLE_ENDIAN */
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -3810,10 +3981,10 @@
ins_right = ((type & SLJIT_MEM_STORE) ? SDR : LDR) | S(mem);
#endif /* SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM(memw), DR(REG_PAIR_FIRST(reg))));
- FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM(memw + (SSIZE_OF(sw) - 1)), DR(REG_PAIR_FIRST(reg))));
- FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))));
- return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM((memw + 2 * SSIZE_OF(sw) - 1)), DR(REG_PAIR_SECOND(reg)));
+ FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_FIRST(reg)) | IMM_LEFT(memw), DR(REG_PAIR_FIRST(reg))));
+ FAIL_IF(push_inst(compiler, ins_right | T(REG_PAIR_FIRST(reg)) | IMM_RIGHT(memw), DR(REG_PAIR_FIRST(reg))));
+ FAIL_IF(push_inst(compiler, ins | T(REG_PAIR_SECOND(reg)) | IMM_LEFT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg))));
+ return push_inst(compiler, ins_right | T(REG_PAIR_SECOND(reg)) | IMM_RIGHT(memw + SSIZE_OF(sw)), DR(REG_PAIR_SECOND(reg)));
}
#endif /* !(SLJIT_MIPS_REV >= 6) */
@@ -3854,8 +4025,8 @@
if (type & SLJIT_MEM_STORE) {
FAIL_IF(push_inst(compiler, SRA_W | T(reg) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), MOVABLE_INS));
- return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), MOVABLE_INS));
+ return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), MOVABLE_INS);
}
flags = BYTE_DATA | LOAD_DATA;
@@ -3863,8 +4034,8 @@
if (op == SLJIT_MOV_S16)
flags |= SIGNED_DATA;
- FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), DR(reg)));
+ FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | IMM_16_FIRST(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | IMM_16_SECOND(memw), DR(reg)));
FAIL_IF(push_inst(compiler, SLL_W | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2)));
return push_inst(compiler, OR | S(reg) | T(TMP_REG2) | D(reg), DR(reg));
@@ -3883,8 +4054,8 @@
SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2);
if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SDR | S(mem) | T(reg) | IMM(memw + 7), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SDR | S(mem) | T(reg) | IMM_RIGHT(memw), MOVABLE_INS);
}
if (mem == reg) {
@@ -3892,8 +4063,8 @@
mem = TMP_REG1;
}
- FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM(memw), DR(reg)));
- return push_inst(compiler, LDR | S(mem) | T(reg) | IMM(memw + 7), DR(reg));
+ FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM_LEFT(memw), DR(reg)));
+ return push_inst(compiler, LDR | S(mem) | T(reg) | IMM_RIGHT(memw), DR(reg));
#endif /* SLJIT_CONFIG_MIPS_32 */
}
@@ -3901,8 +4072,8 @@
SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2);
if (type & SLJIT_MEM_STORE) {
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SWR | S(mem) | T(reg) | IMM(memw + 3), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM_32_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), MOVABLE_INS);
}
if (mem == reg) {
@@ -3910,18 +4081,18 @@
mem = TMP_REG1;
}
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM(memw), DR(reg)));
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM_32_LEFT(memw), DR(reg)));
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- return push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg));
+ return push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg));
#else /* !SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM_32_RIGHT(memw), DR(reg)));
if (op != SLJIT_MOV_U32)
return SLJIT_SUCCESS;
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
- return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11) | (0 << 11), DR(reg));
-#else /* SLJIT_MIPS_REV < 1 */
+ return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11), DR(reg));
+#else /* SLJIT_MIPS_REV < 2 */
FAIL_IF(push_inst(compiler, DSLL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg)));
return push_inst(compiler, DSRL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg));
#endif /* SLJIT_MIPS_REV >= 2 */
@@ -3944,77 +4115,97 @@
if (type & SLJIT_MEM_STORE) {
if (type & SLJIT_32) {
FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS);
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), MOVABLE_INS);
}
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+ FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), MOVABLE_INS));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ FAIL_IF(push_inst(compiler, MFHC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
+ break;
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg) | (1 << 11), DR(TMP_REG2)));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
#endif
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS));
+ break;
+ }
- FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), MOVABLE_INS));
- return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS);
+ FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), MOVABLE_INS);
#else /* !SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, MFC1 | (1 << 21) | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+ FAIL_IF(push_inst(compiler, DMFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2)));
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
- FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS));
- return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS);
+#endif /* MIPS III */
+ FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), MOVABLE_INS));
+ return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), MOVABLE_INS);
#endif /* SLJIT_CONFIG_MIPS_32 */
}
if (type & SLJIT_32) {
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_32_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_32_RIGHT(memw), DR(TMP_REG2)));
FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
+#if !defined(SLJIT_MIPS_REV) || (SLJIT_CONFIG_MIPS_32 && SLJIT_MIPS_REV <= 1)
FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+#endif /* MIPS III */
return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_FIRST_RIGHT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS));
- FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+ FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM_F64_SECOND_RIGHT(memw), DR(TMP_REG2)));
+ switch (cpu_feature_list & CPU_FEATURE_FR) {
+#if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
+ case CPU_FEATURE_FR:
+ return push_inst(compiler, MTHC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS);
+#endif /* SLJIT_MIPS_REV >= 2 */
+ default:
+ FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg) | (1 << 11), MOVABLE_INS));
+ break;
+ }
#else /* !SLJIT_CONFIG_MIPS_32 */
- FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM_LEFT(memw), DR(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM_RIGHT(memw), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, MTC1 | (1 << 21) | T(TMP_REG2) | FS(freg), MOVABLE_INS));
-#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3)
- FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
+ FAIL_IF(push_inst(compiler, DMTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS));
#endif /* SLJIT_CONFIG_MIPS_32 */
+#if !defined(SLJIT_MIPS_REV) || SLJIT_MIPS_REV <= 1
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif /* MIPS III */
return SLJIT_SUCCESS;
}
#endif /* !SLJIT_MIPS_REV || SLJIT_MIPS_REV < 6 */
-#undef MEM16_IMM_FIRST
-#undef MEM16_IMM_SECOND
-#undef MEMF64_FS_FIRST
-#undef MEMF64_FS_SECOND
+#undef IMM_16_SECOND
+#undef IMM_16_FIRST
+#undef IMM_F64_SECOND_RIGHT
+#undef IMM_F64_SECOND_LEFT
+#undef IMM_F64_FIRST_RIGHT
+#undef IMM_F64_FIRST_LEFT
+#undef IMM_32_RIGHT
+#undef IMM_32_LEFT
+#undef IMM_RIGHT
+#undef IMM_LEFT
#undef MEM_CHECK_UNALIGNED
#undef TO_ARGW_HI
diff --git a/src/sljit/sljitNativePPC_32.c b/src/sljit/sljitNativePPC_32.c
index a335c24..2352fad 100644
--- a/src/sljit/sljitNativePPC_32.c
+++ b/src/sljit/sljitNativePPC_32.c
@@ -332,7 +332,7 @@
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
sljit_s32 invert_sign = 1;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000));
src = TMP_REG1;
invert_sign = 0;
@@ -351,10 +351,10 @@
if (invert_sign)
FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI));
- FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
- FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_REG1) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET));
FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
@@ -373,7 +373,7 @@
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
} else if (!FAST_IS_REG(src)) {
@@ -387,11 +387,11 @@
is simply the value of the source argument. Finally we substract 2^53
to get the converted value. */
FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
- FAIL_IF(push_inst(compiler, STW | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, STW | S(src) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
FAIL_IF(push_inst(compiler, STW | S(TMP_REG2) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI));
FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
- FAIL_IF(push_inst(compiler, STW | S(TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, STW | S(TMP_ZERO) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG2) | A(SLJIT_SP) | TMP_MEM_OFFSET));
FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
@@ -455,9 +455,9 @@
FAIL_IF(push_inst(compiler, STW | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI));
if (reg2 != 0)
- FAIL_IF(push_inst(compiler, STW | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, STW | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
else
- FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
return push_inst(compiler, LFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET);
}
@@ -465,7 +465,7 @@
FAIL_IF(push_inst(compiler, STFD | FS(freg) | A(SLJIT_SP) | TMP_MEM_OFFSET));
if (reg2 != 0)
- FAIL_IF(push_inst(compiler, LWZ | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LOW));
+ FAIL_IF(push_inst(compiler, LWZ | S(reg2) | A(SLJIT_SP) | TMP_MEM_OFFSET_LO));
return push_inst(compiler, LWZ | S(reg) | A(SLJIT_SP) | TMP_MEM_OFFSET_HI);
}
diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c
index ff0de04..b3cf9d0 100644
--- a/src/sljit/sljitNativePPC_64.c
+++ b/src/sljit/sljitNativePPC_64.c
@@ -574,7 +574,7 @@
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
@@ -611,7 +611,7 @@
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_u32)srcw));
src = TMP_REG1;
} else {
@@ -626,7 +626,7 @@
FAIL_IF(push_inst(compiler, LFD | FS(TMP_FREG1) | A(SLJIT_SP) | TMP_MEM_OFFSET));
FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
} else {
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
} else if (src & SLJIT_MEM) {
diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c
index fc2389c..54977f0 100644
--- a/src/sljit/sljitNativePPC_common.c
+++ b/src/sljit/sljitNativePPC_common.c
@@ -132,7 +132,7 @@
OE and Rc flag (see ALT_SET_FLAGS). */
#define OE(flags) ((flags) & ALT_SET_FLAGS)
/* Rc flag (see ALT_SET_FLAGS). */
-#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10)
+#define RC(flags) ((sljit_ins)((flags) & ALT_SET_FLAGS) >> 10)
#define HI(opcode) ((sljit_ins)(opcode) << 26)
#define LO(opcode) ((sljit_ins)(opcode) << 1)
@@ -150,6 +150,9 @@
#define BCx (HI(16))
#define BCCTR (HI(19) | LO(528) | (3 << 11))
#define BLR (HI(19) | LO(16) | (0x14 << 21))
+#if defined(_ARCH_PWR10) && _ARCH_PWR10
+#define BRD (HI(31) | LO(187))
+#endif /* POWER10 */
#define CNTLZD (HI(31) | LO(58))
#define CNTLZW (HI(31) | LO(26))
#define CMP (HI(31) | LO(0))
@@ -184,6 +187,9 @@
#define LD (HI(58) | 0)
#define LFD (HI(50))
#define LFS (HI(48))
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+#define LDBRX (HI(31) | LO(532))
+#endif /* POWER7 */
#define LHBRX (HI(31) | LO(790))
#define LWBRX (HI(31) | LO(534))
#define LWZ (HI(32))
@@ -222,6 +228,9 @@
#define SRD (HI(31) | LO(539))
#define SRW (HI(31) | LO(536))
#define STD (HI(62) | 0)
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+#define STDBRX (HI(31) | LO(660))
+#endif /* POWER7 */
#define STDU (HI(62) | 1)
#define STDUX (HI(31) | LO(181))
#define STFD (HI(54))
@@ -266,11 +275,15 @@
#endif /* SLJIT_CONFIG_PPC_32 */
#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
-#define TMP_MEM_OFFSET_LOW TMP_MEM_OFFSET
-#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET + sizeof(sljit_s32))
+#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET)
+#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET + sizeof(sljit_s32))
+#define LWBRX_FIRST_REG S(TMP_REG1)
+#define LWBRX_SECOND_REG S(dst)
#else /* !SLJIT_LITTLE_ENDIAN */
-#define TMP_MEM_OFFSET_LOW (TMP_MEM_OFFSET + sizeof(sljit_s32))
-#define TMP_MEM_OFFSET_HI TMP_MEM_OFFSET
+#define TMP_MEM_OFFSET_LO (TMP_MEM_OFFSET + sizeof(sljit_s32))
+#define TMP_MEM_OFFSET_HI (TMP_MEM_OFFSET)
+#define LWBRX_FIRST_REG S(dst)
+#define LWBRX_SECOND_REG S(TMP_REG1)
#endif /* SLJIT_LITTLE_ENDIAN */
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
@@ -439,6 +452,7 @@
reverse_buf(compiler);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+ /* add to compiler->size additional instruction space to hold the trampoline and padding */
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
#else
@@ -639,7 +653,6 @@
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
- compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
@@ -657,8 +670,12 @@
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
+ compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins) + sizeof(struct sljit_function_context);
+
return code_ptr;
#else
+ compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
+
return code;
#endif
}
@@ -668,12 +685,17 @@
switch (feature_type) {
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
#endif
-
+ case SLJIT_HAS_REV:
+#if defined(_ARCH_PWR10) && _ARCH_PWR10
+ return 1;
+#else /* !POWER10 */
+ return 2;
+#endif /* POWER10 */
/* A saved register is set to a zero value. */
case SLJIT_HAS_ZERO_REGISTER:
case SLJIT_HAS_CLZ:
@@ -682,7 +704,6 @@
return 1;
case SLJIT_HAS_CTZ:
- case SLJIT_HAS_REV:
return 2;
default:
@@ -1251,7 +1272,7 @@
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
src1_r = TMP_ZERO;
if (src1w != 0) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
@@ -1271,7 +1292,7 @@
if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
dst_r = src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
src2_r = TMP_ZERO;
if (src2w != 0) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
@@ -1371,12 +1392,16 @@
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (!is_32) {
+#if defined(_ARCH_PWR10) && _ARCH_PWR10
+ return push_inst(compiler, BRD | S(src) | A(dst));
+#else /* !POWER10 */
FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_HI)));
FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32)));
FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(SLJIT_SP) | B(TMP_REG2)));
- FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_LOW)));
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(0) | IMM(TMP_MEM_OFFSET_LO)));
FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(SLJIT_SP) | B(TMP_REG2)));
return push_inst(compiler, LD | D(dst) | A(SLJIT_SP) | TMP_MEM_OFFSET);
+#endif /* POWER10 */
}
#endif /* SLJIT_CONFIG_PPC_64 */
@@ -1452,16 +1477,30 @@
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (!is_32) {
if (dst & SLJIT_MEM) {
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+ return push_inst(compiler, STDBRX | S(src) | A(mem) | B(offs_reg));
+#else /* !POWER7 */
+#if defined(SLJIT_LITTLE_ENDIAN) && SLJIT_LITTLE_ENDIAN
+ FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32)));
+ FAIL_IF(push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(offs_reg)));
+ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32))));
+ return push_inst(compiler, STWBRX | S(src) | A(mem) | B(TMP_REG2));
+#else /* !SLJIT_LITTLE_ENDIAN */
FAIL_IF(push_inst(compiler, STWBRX | S(src) | A(mem) | B(offs_reg)));
FAIL_IF(push_inst(compiler, RLDICL | S(src) | A(TMP_REG1) | RLDI_SH(32) | RLDI_MB(32)));
FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32))));
return push_inst(compiler, STWBRX | S(TMP_REG1) | A(mem) | B(TMP_REG2));
+#endif /* SLJIT_LITTLE_ENDIAN */
+#endif /* POWER7 */
}
-
- FAIL_IF(push_inst(compiler, LWBRX | S(dst) | A(mem) | B(offs_reg)));
+#if defined(_ARCH_PWR7) && _ARCH_PWR7
+ return push_inst(compiler, LDBRX | S(dst) | A(mem) | B(offs_reg));
+#else /* !POWER7 */
+ FAIL_IF(push_inst(compiler, LWBRX | LWBRX_FIRST_REG | A(mem) | B(offs_reg)));
FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG2) | A(offs_reg) | IMM(SSIZE_OF(s32))));
- FAIL_IF(push_inst(compiler, LWBRX | S(TMP_REG1) | A(mem) | B(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, LWBRX | LWBRX_SECOND_REG | A(mem) | B(TMP_REG2)));
return push_inst(compiler, RLDIMI | S(TMP_REG1) | A(dst) | RLDI_SH(32) | RLDI_MB(0));
+#endif /* POWER7 */
}
#endif /* SLJIT_CONFIG_PPC_64 */
@@ -1477,7 +1516,7 @@
}
#define EMIT_MOV(type, type_flags, type_cast) \
- emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
+ emit_op(compiler, (src == SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? type_cast srcw : srcw)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@@ -1508,7 +1547,7 @@
if (op == SLJIT_MOV_S32)
op = SLJIT_MOV_U32;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
if (op == SLJIT_MOV_U32)
op = SLJIT_MOV_S32;
}
@@ -1580,40 +1619,22 @@
#undef EMIT_MOV
+/* Macros for checking different operand types / values. */
#define TEST_SL_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
-
+ ((src) == SLJIT_IMM && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
#define TEST_UL_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-#define TEST_SH_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
-#else
-#define TEST_SH_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & 0xffff))
-#endif
-
+ ((src) == SLJIT_IMM && !((srcw) & ~0xffff))
#define TEST_UH_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & ~(sljit_sw)0xffff0000))
+ ((src) == SLJIT_IMM && !((srcw) & ~(sljit_sw)0xffff0000))
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define TEST_SH_IMM(src, srcw) \
+ ((src) == SLJIT_IMM && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
#define TEST_ADD_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
-#else
-#define TEST_ADD_IMM(src, srcw) \
- ((src) & SLJIT_IMM)
-#endif
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ ((src) == SLJIT_IMM && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
#define TEST_UI_IMM(src, srcw) \
- (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
-#else
-#define TEST_UI_IMM(src, srcw) \
- ((src) & SLJIT_IMM)
-#endif
+ ((src) == SLJIT_IMM && !((srcw) & ~0xffffffff))
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#define TEST_ADD_FORM1(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \
|| (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY))
@@ -1623,14 +1644,22 @@
#define TEST_SUB_FORM3(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \
|| (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z))
-#else
+
+#else /* !SLJIT_CONFIG_PPC_64 */
+#define TEST_SH_IMM(src, srcw) \
+ ((src) == SLJIT_IMM && !((srcw) & 0xffff))
+#define TEST_ADD_IMM(src, srcw) \
+ ((src) == SLJIT_IMM)
+#define TEST_UI_IMM(src, srcw) \
+ ((src) == SLJIT_IMM)
+
#define TEST_ADD_FORM1(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
#define TEST_SUB_FORM2(op) \
(GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL)
#define TEST_SUB_FORM3(op) \
(GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
-#endif
+#endif /* SLJIT_CONFIG_PPC_64 */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@@ -1649,9 +1678,9 @@
if (op & SLJIT_32) {
/* Most operations expect sign extended arguments. */
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)(src1w);
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)(src2w);
if (HAS_FLAGS(op))
flags |= ALT_SIGN_EXT;
@@ -1667,7 +1696,7 @@
if (TEST_ADD_FORM1(op))
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
- if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
+ if (!HAS_FLAGS(op) && (src1 == SLJIT_IMM || src2 == SLJIT_IMM)) {
if (TEST_SL_IMM(src2, src2w)) {
compiler->imm = (sljit_ins)src2w & 0xffff;
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
@@ -1736,7 +1765,7 @@
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
}
- if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
+ if (src2 == SLJIT_IMM && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
compiler->imm = (sljit_ins)src2w;
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
}
@@ -1752,7 +1781,7 @@
}
if (TEST_SUB_FORM2(op)) {
- if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) {
+ if (src2 == SLJIT_IMM && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) {
compiler->imm = (sljit_ins)src2w & 0xffff;
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
}
@@ -1811,10 +1840,10 @@
return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_XOR:
- if ((src2 & SLJIT_IMM) && src2w == -1) {
+ if (src2 == SLJIT_IMM && src2w == -1) {
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src1, src1w);
}
- if ((src1 & SLJIT_IMM) && src1w == -1) {
+ if (src1 == SLJIT_IMM && src1w == -1) {
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM4, dst, dstw, TMP_REG1, 0, src2, src2w);
}
/* fallthrough */
@@ -1864,7 +1893,7 @@
if (op & SLJIT_32)
flags |= ALT_FORM2;
#endif
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
compiler->imm = (sljit_ins)src2w;
return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
}
@@ -1916,7 +1945,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= bit_length - 1;
if (src3w == 0)
@@ -2059,7 +2088,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type != SLJIT_FLOAT_REGISTER)
@@ -2071,6 +2100,8 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size)
{
+ SLJIT_UNUSED_ARG(size);
+
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -2440,6 +2471,8 @@
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 arg_types)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@@ -2476,7 +2509,7 @@
#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */
src_r = src;
#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
/* These jumps are converted to jump/call instructions when possible. */
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
FAIL_IF(!jump);
@@ -2506,6 +2539,8 @@
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@@ -2737,7 +2772,7 @@
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w, TMP_REG1));
- } else if (src1 & SLJIT_IMM) {
+ } else if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
if (type & SLJIT_32)
src1w = (sljit_s32)src1w;
diff --git a/src/sljit/sljitNativeRISCV_common.c b/src/sljit/sljitNativeRISCV_common.c
index f05ebbf..3b54ab9 100644
--- a/src/sljit/sljitNativeRISCV_common.c
+++ b/src/sljit/sljitNativeRISCV_common.c
@@ -536,7 +536,7 @@
switch (feature_type) {
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#elif defined(__riscv_float_abi_soft)
return 0;
#else
@@ -1068,7 +1068,7 @@
{
sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ);
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
#else /* !SLJIT_CONFIG_RISCV_64 */
sljit_ins word_size = 32;
@@ -1154,7 +1154,7 @@
static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
{
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
#else /* !SLJIT_CONFIG_RISCV_64 */
sljit_ins word_size = 32;
@@ -1191,7 +1191,7 @@
sljit_s32 is_overflow, is_carry, carry_src_r, is_handled;
sljit_ins op_imm, op_reg;
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
#endif /* SLJIT_CONFIG_RISCV_64 */
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
@@ -1653,11 +1653,11 @@
flags |= SLOW_DEST;
if (flags & IMM_OP) {
- if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
+ if (src2 == SLJIT_IMM && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
flags |= SRC2_IMM;
src2_r = src2w;
}
- else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
+ else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
flags |= SRC2_IMM;
src2_r = src1w;
@@ -1674,7 +1674,7 @@
src1_r = src1;
flags |= REG1_SOURCE;
}
- else if (src1 & SLJIT_IMM) {
+ else if (src1 == SLJIT_IMM) {
if (src1w) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
src1_r = TMP_REG1;
@@ -1697,7 +1697,7 @@
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
dst_r = (sljit_s32)src2_r;
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
if (!(flags & SRC2_IMM)) {
if (src2w) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3));
@@ -1754,7 +1754,7 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
SLJIT_ASSERT(word == 0 || word == 0x8);
#endif /* SLJIT_CONFIG_RISCV_64 */
@@ -1823,25 +1823,25 @@
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
case SLJIT_MOV_U32:
- return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
case SLJIT_MOV_S32:
/* Logical operators have no W variant, so sign extended input is necessary for them. */
case SLJIT_MOV32:
- return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
+ return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
#endif
case SLJIT_MOV_U8:
- return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
case SLJIT_MOV_S8:
- return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+ return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
case SLJIT_MOV_U16:
- return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
case SLJIT_MOV_S16:
- return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+ return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
case SLJIT_CLZ:
case SLJIT_CTZ:
@@ -1877,9 +1877,9 @@
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
if (op & SLJIT_32) {
flags |= INT_DATA | SIGNED_DATA;
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
src1w = (sljit_s32)src1w;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
src2w = (sljit_s32)src2w;
}
#endif
@@ -1912,7 +1912,7 @@
case SLJIT_MASHR:
case SLJIT_ROTL:
case SLJIT_ROTR:
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
src2w &= 0x1f;
#else /* !SLJIT_CONFIG_RISCV_32 */
@@ -1950,7 +1950,7 @@
sljit_s32 is_left;
sljit_ins ins1, ins2, ins3;
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (op & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
#else /* !SLJIT_CONFIG_RISCV_64 */
@@ -1972,7 +1972,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= bit_length - 1;
if (src3w == 0)
@@ -2083,7 +2083,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return reg_map[reg];
if (type != SLJIT_FLOAT_REGISTER)
@@ -2095,6 +2095,8 @@
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size)
{
+ SLJIT_UNUSED_ARG(size);
+
CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -2154,7 +2156,7 @@
FAIL_IF(emit_op_mem2(compiler, ((ins & (1 << 21)) ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
#endif /* !SLJIT_CONFIG_RISCV_32 */
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
+ } else if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3));
src = TMP_REG1;
}
@@ -2178,7 +2180,7 @@
#else /* !SLJIT_CONFIG_RISCV_32 */
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
ins |= (1 << 21);
- else if (src & SLJIT_IMM)
+ else if (src == SLJIT_IMM)
srcw = (sljit_s32)srcw;
if (op != SLJIT_CONV_F64_FROM_S32)
@@ -2200,7 +2202,7 @@
#else /* !SLJIT_CONFIG_RISCV_32 */
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
ins |= (1 << 21);
- else if (src & SLJIT_IMM)
+ else if (src == SLJIT_IMM)
srcw = (sljit_u32)srcw;
if (op != SLJIT_CONV_F64_FROM_S32)
@@ -2575,7 +2577,7 @@
src2 = TMP_REG2;
}
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
if (src1w != 0) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
src1 = TMP_REG1;
@@ -2584,7 +2586,7 @@
src1 = TMP_ZERO;
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (src2w != 0) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3));
src2 = TMP_REG2;
@@ -2654,7 +2656,7 @@
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (src & SLJIT_MEM) {
ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
@@ -2804,7 +2806,7 @@
sljit_ins *ptr;
sljit_uw size;
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
- sljit_ins word = (type & SLJIT_32) >> 5;
+ sljit_ins word = (sljit_ins)(type & SLJIT_32) >> 5;
sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
#else /* !SLJIT_CONFIG_RISCV_64 */
sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
@@ -2845,7 +2847,7 @@
if (src1 & SLJIT_MEM) {
FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
- } else if (src1 & SLJIT_IMM) {
+ } else if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
if (word)
src1w = (sljit_s32)src1w;
diff --git a/src/sljit/sljitNativeS390X.c b/src/sljit/sljitNativeS390X.c
index 5b5e7c6..97521b5 100644
--- a/src/sljit/sljitNativeS390X.c
+++ b/src/sljit/sljitNativeS390X.c
@@ -451,10 +451,12 @@
static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
{
+ sljit_uw dh, dl;
+
SLJIT_ASSERT(is_s20(d));
- sljit_uw dh = (d >> 12) & 0xff;
- sljit_uw dl = (d << 8) & 0xfff00;
+ dh = (d >> 12) & 0xff;
+ dl = ((sljit_uw)d << 8) & 0xfff00;
return (dh | dl) << 8;
}
@@ -1065,15 +1067,17 @@
sljit_gpr dst_r,
sljit_s32 src, sljit_sw srcw)
{
+ sljit_gpr src_r;
+
SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
return push_load_imm_inst(compiler, dst_r, srcw);
if (src & SLJIT_MEM)
return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
- sljit_gpr src_r = gpr(src & REG_MASK);
+ src_r = gpr(src & REG_MASK);
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
}
@@ -1266,10 +1270,10 @@
sljit_s32 dst, sljit_sw dstw,
sljit_sw srcw)
{
- SLJIT_ASSERT(dst & SLJIT_MEM);
-
sljit_gpr dst_r = tmp1;
+ SLJIT_ASSERT(dst & SLJIT_MEM);
+
if (dst & OFFS_REG_MASK) {
sljit_gpr index = tmp1;
@@ -1574,6 +1578,8 @@
if (jump && jump->addr == j) {
sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
+ sljit_ins op, arg;
+
jump->addr = (sljit_uw)pool_ptr;
/* load address into tmp1 */
@@ -1590,8 +1596,8 @@
*(pool_ptr++) = (sljit_uw)target;
/* branch to tmp1 */
- sljit_ins op = (ins >> 32) & 0xf;
- sljit_ins arg = (ins >> 36) & 0xf;
+ op = (ins >> 32) & 0xf;
+ arg = (ins >> 36) & 0xf;
switch (op) {
case 4: /* brcl -> bcr */
ins = bcr(arg, tmp1);
@@ -1645,6 +1651,8 @@
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = ins_size;
+ if (pool_size)
+ compiler->executable_size += (pad_size + pool_size);
code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
SLJIT_CACHE_FLUSH(code, code_ptr);
@@ -1657,15 +1665,24 @@
/* TODO(mundaym): implement all */
switch (feature_type) {
case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
+#else
+ return 1;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+
case SLJIT_HAS_CLZ:
case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
+
case SLJIT_HAS_CTZ:
return 2;
+
case SLJIT_HAS_CMOV:
return have_lscond1() ? 1 : 0;
}
@@ -2176,7 +2193,7 @@
return SLJIT_SUCCESS;
}
/* LOAD IMMEDIATE */
- if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
+ if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
switch (opcode) {
case SLJIT_MOV_U8:
srcw = (sljit_sw)((sljit_u8)(srcw));
@@ -2255,14 +2272,14 @@
return SLJIT_SUCCESS;
}
/* STORE and STORE IMMEDIATE */
- if ((dst & SLJIT_MEM)
- && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
+ if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
+ struct addr mem;
sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
- if (src & SLJIT_IMM) {
+
+ if (src == SLJIT_IMM) {
/* TODO(mundaym): MOVE IMMEDIATE? */
FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
}
- struct addr mem;
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
switch (opcode) {
case SLJIT_MOV_U8:
@@ -2329,7 +2346,7 @@
SLJIT_UNREACHABLE();
}
- SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
+ SLJIT_ASSERT(src != SLJIT_IMM);
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
@@ -2407,7 +2424,7 @@
const struct ins_forms *forms;
sljit_ins ins;
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
if (sets_overflow)
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
@@ -2492,9 +2509,8 @@
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
- if (src2 & SLJIT_IMM) {
- if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
- {
+ if (src2 == SLJIT_IMM) {
+ if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
if ((op & SLJIT_32) || is_s32(src2w)) {
ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
@@ -2535,7 +2551,7 @@
goto done;
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
sljit_sw neg_src2w = -src2w;
if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
@@ -2643,7 +2659,7 @@
return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (is_s16(src2w)) {
ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
@@ -2750,7 +2766,7 @@
sljit_s32 type = GET_OPCODE(op);
const struct ins_forms *forms;
- if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
+ if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
sljit_s32 count16 = 0;
sljit_uw imm = (sljit_uw)src2w;
@@ -2814,7 +2830,7 @@
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
@@ -2874,7 +2890,7 @@
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
@@ -2884,7 +2900,7 @@
}
if (GET_OPCODE(op) == SLJIT_ROTR) {
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
base_r = tmp1;
@@ -2892,7 +2908,7 @@
src2w = -src2w;
}
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
@@ -2933,7 +2949,7 @@
compiler->mode = op & SLJIT_32;
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
- if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
+ if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
src1 ^= src2;
src2 ^= src1;
src1 ^= src2;
@@ -3026,7 +3042,7 @@
ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
src3w &= bit_length - 1;
if (src3w == 0)
@@ -3183,7 +3199,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER)
+ if (type == SLJIT_GP_REGISTER)
return (sljit_s32)gpr(reg);
if (type != SLJIT_FLOAT_REGISTER)
@@ -3286,7 +3302,7 @@
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
src = (sljit_s32)tmp0;
}
@@ -3309,7 +3325,7 @@
{
sljit_ins ins;
- if ((src & SLJIT_IMM) && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
+ if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
@@ -3326,7 +3342,7 @@
{
sljit_ins ins;
- if ((src & SLJIT_IMM) && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
+ if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
srcw = (sljit_u32)srcw;
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
@@ -3525,7 +3541,7 @@
u.value = value;
- FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm << 32));
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
}
@@ -3595,14 +3611,14 @@
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
{
+ struct sljit_jump *jump;
sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_jump(compiler, type));
/* record jump */
- struct sljit_jump *jump = (struct sljit_jump *)
- ensure_abuf(compiler, sizeof(struct sljit_jump));
+ jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF(!jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
jump->addr = compiler->size;
@@ -3640,7 +3656,7 @@
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
}
@@ -3660,6 +3676,8 @@
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@@ -3691,13 +3709,13 @@
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
{
+ sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
+ sljit_gpr loc_r = tmp1;
sljit_u8 mask = get_cc(compiler, type);
CHECK_ERROR();
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
- sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
- sljit_gpr loc_r = tmp1;
switch (GET_OPCODE(op)) {
case SLJIT_AND:
case SLJIT_OR:
@@ -3817,7 +3835,7 @@
return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
}
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
if (type & SLJIT_32)
src1w = (sljit_s32)src1w;
diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c
index ed69e6b..1bba883 100644
--- a/src/sljit/sljitNativeX86_32.c
+++ b/src/sljit/sljitNativeX86_32.c
@@ -62,21 +62,19 @@
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
/* SSE2 and immediate is not possible. */
- SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
- SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
- && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
- && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+ SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
+ SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+ SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
size &= 0xf;
- inst_size = size;
+ /* The mod r/m byte is always present. */
+ inst_size = size + 1;
- if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
- inst_size++;
- if (flags & EX86_PREF_66)
+ if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
inst_size++;
/* Calculate size of b. */
- inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if (!(b & REG_MASK))
inst_size += sizeof(sljit_sw);
@@ -87,8 +85,7 @@
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_sw);
- }
- else if (reg_map[b & REG_MASK] == 5) {
+ } else if (reg_map[b & REG_MASK] == 5) {
/* Swap registers if possible. */
if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
@@ -105,15 +102,14 @@
}
/* Calculate size of a. */
- if (a & SLJIT_IMM) {
+ if (a == SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
if (imma <= 127 && imma >= -128) {
inst_size += 1;
flags |= EX86_BYTE_ARG;
} else
inst_size += 4;
- }
- else if (flags & EX86_SHIFT_INS) {
+ } else if (flags & EX86_SHIFT_INS) {
SLJIT_ASSERT(imma <= 0x1f);
if (imma != 1) {
inst_size++;
@@ -125,8 +121,7 @@
inst_size += sizeof(short);
else
inst_size += sizeof(sljit_sw);
- }
- else
+ } else
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
@@ -136,27 +131,26 @@
INC_SIZE(inst_size);
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
- if (flags & EX86_PREF_F3)
+ else if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
- if (flags & EX86_PREF_66)
+ else if (flags & EX86_PREF_66)
*inst++ = 0x66;
buf_ptr = inst + size;
/* Encode mod/rm byte. */
if (!(flags & EX86_SHIFT_INS)) {
- if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+ if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
- if (a & SLJIT_IMM)
+ if (a == SLJIT_IMM)
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = U8(reg_map[a] << 3);
else
*buf_ptr = U8(a << 3);
- }
- else {
- if (a & SLJIT_IMM) {
+ } else {
+ if (a == SLJIT_IMM) {
if (imma == 1)
*inst = GROUP_SHIFT_1;
else
@@ -183,8 +177,9 @@
if (!(b & OFFS_REG_MASK))
*buf_ptr++ |= reg_map_b;
else {
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
+ buf_ptr += 2;
}
if (immb != 0 || reg_map_b == 5) {
@@ -195,25 +190,24 @@
buf_ptr += sizeof(sljit_sw);
}
}
- }
- else {
+ } else {
if (reg_map_b == 5)
*buf_ptr |= 0x40;
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr += 2;
if (reg_map_b == 5)
*buf_ptr++ = 0;
}
- }
- else {
+ } else {
*buf_ptr++ |= 0x05;
sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_sw);
}
- if (a & SLJIT_IMM) {
+ if (a == SLJIT_IMM) {
if (flags & EX86_BYTE_ARG)
*buf_ptr = U8(imma);
else if (flags & EX86_HALF_ARG)
@@ -222,7 +216,67 @@
sljit_unaligned_store_sw(buf_ptr, imma);
}
- return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+ return inst;
+}
+
+static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
+ /* The first and second register operand. */
+ sljit_s32 a, sljit_s32 v,
+ /* The general operand (not immediate). */
+ sljit_s32 b, sljit_sw immb)
+{
+ sljit_u8 *inst;
+ sljit_u8 vex = 0;
+ sljit_u8 vex_m = 0;
+ sljit_uw size;
+
+ SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+
+ if (op & VEX_OP_0F38)
+ vex_m = 0x2;
+ else if (op & VEX_OP_0F3A)
+ vex_m = 0x3;
+
+ if (op & VEX_W) {
+ if (vex_m == 0)
+ vex_m = 0x1;
+
+ vex |= 0x80;
+ }
+
+ if (op & EX86_PREF_66)
+ vex |= 0x1;
+ else if (op & EX86_PREF_F2)
+ vex |= 0x3;
+ else if (op & EX86_PREF_F3)
+ vex |= 0x2;
+
+ op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
+
+ if (op & VEX_256)
+ vex |= 0x4;
+
+ vex = U8(vex | ((((op & VEX_SSE2_OPV) ? v : reg_map[v]) ^ 0xf) << 3));
+
+ size = op & ~(sljit_uw)0xff;
+ size |= (vex_m == 0) ? 3 : 4;
+
+ inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
+ FAIL_IF(!inst);
+
+ if (vex_m == 0) {
+ inst[0] = 0xc5;
+ inst[1] = U8(vex | 0x80);
+ inst[2] = U8(op);
+ return SLJIT_SUCCESS;
+ }
+
+ inst[0] = 0xc4;
+ inst[1] = U8(vex_m | 0xe0);
+ inst[2] = vex;
+ inst[3] = U8(op);
+ return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
@@ -776,7 +830,7 @@
offset = stack_size + compiler->local_size;
- if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
+ if (src != SLJIT_IMM && src != SLJIT_R0) {
if (word_arg_count >= 1) {
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
r2_offset = sizeof(sljit_sw);
@@ -830,7 +884,7 @@
stack_size = args_size + SSIZE_OF(sw);
- if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
+ if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) {
r2_offset = SSIZE_OF(sw);
stack_size += SSIZE_OF(sw);
}
@@ -859,7 +913,7 @@
EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
}
- if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
+ if (src != SLJIT_IMM && src != SLJIT_R0) {
if (word_arg_count >= 1) {
SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
@@ -1063,7 +1117,7 @@
stack_size = type;
FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
src = SLJIT_R0;
srcw = 0;
}
@@ -1291,16 +1345,13 @@
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= ROL;
+ inst[1] |= ROL;
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= SHR;
+ inst[1] |= SHR;
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
@@ -1308,7 +1359,7 @@
inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10);
size1 = compiler->size;
- FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, 0, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit));
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm, EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit));
inst[1] = U8(compiler->size - size1);
@@ -1332,10 +1383,7 @@
size1 = compiler->size;
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
@@ -1352,7 +1400,7 @@
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= SHR;
+ inst[1] |= SHR;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
@@ -1365,12 +1413,8 @@
BINARY_IMM32(OR, 1, TMP_REG1, 0);
jump_inst1[1] = U8(compiler->size - size1);
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
-
- FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, dst_r, 0));
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
jump_inst2[1] = U8(compiler->size - size2);
@@ -1430,21 +1474,14 @@
u.value = value;
if (u.imm[0] == 0) {
- if (u.imm[1] == 0) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PXOR_x_xm;
- return SLJIT_SUCCESS;
- }
+ if (u.imm[1] == 0)
+ return emit_groupf(compiler, PXOR_x_xm, EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
} else
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]);
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, TMP_REG1, 0);
- inst[0] = GROUP_0F;
- inst[1] = MOVD_x_rm;
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm, EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0));
if (u.imm[1] == 0)
return SLJIT_SUCCESS;
@@ -1466,20 +1503,12 @@
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
- if (cpu_feature_list && CPU_FEATURE_SSE41) {
- inst = emit_x86_instruction(compiler, 3 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = 0x3a;
- inst[2] = PINSRD_x_rm_i8;
-
+ if (cpu_feature_list & CPU_FEATURE_SSE41) {
+ FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8, EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));
return emit_byte(compiler, 1);
}
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, 0, TMP_REG1, 0);
- inst[0] = GROUP_0F;
- inst[1] = MOVD_x_rm;
-
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm, EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0));
tmp_freg = TMP_FREG;
}
@@ -1509,28 +1538,22 @@
SLJIT_ASSERT(cpu_feature_list != 0);
- if (!(op & SLJIT_32) && (cpu_feature_list && CPU_FEATURE_SSE41)) {
+ if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) {
if (reg & REG_PAIR_MASK) {
reg2 = REG_PAIR_FIRST(reg);
reg = REG_PAIR_SECOND(reg);
CHECK_EXTRA_REGS(reg, regw, (void)0);
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, reg, regw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
+ FAIL_IF(emit_groupf(compiler, GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x,
+ EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw));
} else
reg2 = reg;
CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
- inst = emit_x86_instruction(compiler, 3 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, reg2, reg2w);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = 0x3a;
- inst[2] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8;
-
+ FAIL_IF(emit_groupf_ext(compiler, GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8,
+ EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w));
return emit_byte(compiler, 1);
}
@@ -1546,13 +1569,9 @@
CHECK_EXTRA_REGS(reg, regw, (void)0);
- if (op & SLJIT_32) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, reg, regw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
- return SLJIT_SUCCESS;
- }
+ if (op & SLJIT_32)
+ return emit_groupf(compiler, GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x,
+ EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw);
if (op == SLJIT_COPY_FROM_F64) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
@@ -1564,18 +1583,12 @@
inst[2] = PSHUFD_x_xm;
inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg);
inst[4] = 1;
- } else if (reg != 0) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, 0, reg, regw);
- inst[0] = GROUP_0F;
- inst[1] = MOVD_x_rm;
- }
+ } else if (reg != 0)
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm, EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
- if (reg2 != 0) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, reg2, reg2w);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
- }
+ if (reg2 != 0)
+ FAIL_IF(emit_groupf(compiler, GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x,
+ EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w));
if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
@@ -1585,12 +1598,8 @@
inst[0] = GROUP_0F;
inst[1] = UNPCKLPS_x_xm;
inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG));
- } else {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, 0, reg, regw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = MOVD_rm_x;
- }
+ } else
+ FAIL_IF(emit_groupf(compiler, MOVD_rm_x, EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
return SLJIT_SUCCESS;
}
diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c
index d245923..39114c2 100644
--- a/src/sljit/sljitNativeX86_64.c
+++ b/src/sljit/sljitNativeX86_64.c
@@ -72,7 +72,7 @@
sljit_uw inst_size;
/* The immediate operand must be 32 bit. */
- SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
+ SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma));
/* Both cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
/* Size flags not allowed for typed instructions. */
@@ -80,26 +80,24 @@
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
/* SSE2 and immediate is not possible. */
- SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
- SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
- && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
- && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
+ SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
+ SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+ SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
size &= 0xf;
- inst_size = size;
+ /* The mod r/m byte is always present. */
+ inst_size = size + 1;
if (!compiler->mode32 && !(flags & EX86_NO_REXW))
rex |= REX_W;
else if (flags & EX86_REX)
rex |= REX;
- if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
- inst_size++;
- if (flags & EX86_PREF_66)
+ if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
inst_size++;
/* Calculate size of b. */
- inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
@@ -119,8 +117,7 @@
inst_size += sizeof(sljit_s8);
else
inst_size += sizeof(sljit_s32);
- }
- else if (reg_lmap[b & REG_MASK] == 5) {
+ } else if (reg_lmap[b & REG_MASK] == 5) {
/* Swap registers if possible. */
if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
@@ -140,23 +137,26 @@
rex |= REX_X;
}
}
- }
- else if (!(flags & EX86_SSE2_OP2)) {
+ } else if (!(flags & EX86_SSE2_OP2)) {
if (reg_map[b] >= 8)
rex |= REX_B;
- }
- else if (freg_map[b] >= 8)
+ } else if (freg_map[b] >= 8)
rex |= REX_B;
- if (a & SLJIT_IMM) {
+ if ((flags & EX86_VEX_EXT) && (rex & 0x3)) {
+ SLJIT_ASSERT(size == 2);
+ size++;
+ inst_size++;
+ }
+
+ if (a == SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
if (imma <= 127 && imma >= -128) {
inst_size += 1;
flags |= EX86_BYTE_ARG;
} else
inst_size += 4;
- }
- else if (flags & EX86_SHIFT_INS) {
+ } else if (flags & EX86_SHIFT_INS) {
SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
if (imma != 1) {
inst_size++;
@@ -168,8 +168,7 @@
inst_size += sizeof(short);
else
inst_size += sizeof(sljit_s32);
- }
- else {
+ } else {
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
if (!(flags & EX86_SSE2_OP1)) {
@@ -186,14 +185,16 @@
inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
PTR_FAIL_IF(!inst);
- /* Encoding the byte. */
+ /* Encoding prefixes. */
INC_SIZE(inst_size);
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
- if (flags & EX86_PREF_F3)
+ else if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
- if (flags & EX86_PREF_66)
+ else if (flags & EX86_PREF_66)
*inst++ = 0x66;
+
+ /* Rex is always the last prefix. */
if (rex)
*inst++ = rex;
@@ -201,18 +202,17 @@
/* Encode mod/rm byte. */
if (!(flags & EX86_SHIFT_INS)) {
- if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
+ if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
- if (a & SLJIT_IMM)
+ if (a == SLJIT_IMM)
*buf_ptr = 0;
else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = U8(reg_lmap[a] << 3);
else
*buf_ptr = U8(freg_lmap[a] << 3);
- }
- else {
- if (a & SLJIT_IMM) {
+ } else {
+ if (a == SLJIT_IMM) {
if (imma == 1)
*inst = GROUP_SHIFT_1;
else
@@ -239,8 +239,9 @@
if (!(b & OFFS_REG_MASK))
*buf_ptr++ |= reg_lmap_b;
else {
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
+ buf_ptr += 2;
}
if (immb != 0 || reg_lmap_b == 5) {
@@ -251,26 +252,26 @@
buf_ptr += sizeof(sljit_s32);
}
}
- }
- else {
+ } else {
if (reg_lmap_b == 5)
*buf_ptr |= 0x40;
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
+ buf_ptr += 2;
if (reg_lmap_b == 5)
*buf_ptr++ = 0;
}
- }
- else {
- *buf_ptr++ |= 0x04;
- *buf_ptr++ = 0x25;
+ } else {
+ buf_ptr[0] |= 0x04;
+ buf_ptr[1] = 0x25;
+ buf_ptr += 2;
sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_s32);
}
- if (a & SLJIT_IMM) {
+ if (a == SLJIT_IMM) {
if (flags & EX86_BYTE_ARG)
*buf_ptr = U8(imma);
else if (flags & EX86_HALF_ARG)
@@ -279,7 +280,78 @@
sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
}
- return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
+ return inst;
+}
+
+static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
+ /* The first and second register operand. */
+ sljit_s32 a, sljit_s32 v,
+ /* The general operand (not immediate). */
+ sljit_s32 b, sljit_sw immb)
+{
+ sljit_u8 *inst;
+ sljit_u8 vex = 0;
+ sljit_u8 vex_m = 0;
+ sljit_uw size;
+
+ SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
+ & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
+
+ op |= EX86_REX;
+
+ if (op & VEX_OP_0F38)
+ vex_m = 0x2;
+ else if (op & VEX_OP_0F3A)
+ vex_m = 0x3;
+
+ if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) {
+ if (vex_m == 0)
+ vex_m = 0x1;
+
+ vex |= 0x80;
+ }
+
+ if (op & EX86_PREF_66)
+ vex |= 0x1;
+ else if (op & EX86_PREF_F2)
+ vex |= 0x3;
+ else if (op & EX86_PREF_F3)
+ vex |= 0x2;
+
+ op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
+
+ if (op & VEX_256)
+ vex |= 0x4;
+
+ vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
+
+ size = op & ~(sljit_uw)0xff;
+ size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3;
+
+ inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
+ FAIL_IF(!inst);
+
+ SLJIT_ASSERT((inst[-1] & 0xf0) == REX);
+
+ /* If X or B is present in REX prefix. */
+ if (vex_m == 0 && inst[-1] & 0x3)
+ vex_m = 0x1;
+
+ if (vex_m == 0) {
+ vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7);
+
+ inst[-1] = 0xc5;
+ inst[0] = vex;
+ inst[1] = U8(op);
+ return SLJIT_SUCCESS;
+ }
+
+ vex_m |= U8((inst[-1] ^ 0x7) << 5);
+ inst[-1] = 0xc4;
+ inst[0] = vex_m;
+ inst[1] = vex;
+ inst[2] = U8(op);
+ return SLJIT_SUCCESS;
}
/* --------------------------------------------------------------------- */
@@ -539,16 +611,12 @@
tmp = SLJIT_FS0 - fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- inst[0] = GROUP_0F;
- inst[1] = MOVAPS_xm_x;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x, EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- inst[0] = GROUP_0F;
- inst[1] = MOVAPS_xm_x;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x, EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
}
@@ -606,16 +674,12 @@
tmp = SLJIT_FS0 - fsaveds;
for (i = SLJIT_FS0; i > tmp; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- inst[0] = GROUP_0F;
- inst[1] = MOVAPS_x_xm;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm, EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
- inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
- inst[0] = GROUP_0F;
- inst[1] = MOVAPS_x_xm;
+ FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm, EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
saved_float_regs_offset += 16;
}
@@ -1028,7 +1092,7 @@
compiler->mode32 = 0;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
if (!sign || ((sljit_u32)srcw <= 0x7fffffff))
return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
@@ -1084,17 +1148,14 @@
compiler->mode32 = 0;
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
compiler->mode32 = 1;
EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
compiler->mode32 = 0;
} else
FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw));
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
compiler->mode32 = 1;
@@ -1119,10 +1180,7 @@
size1 = compiler->size;
compiler->mode32 = 0;
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
@@ -1141,7 +1199,7 @@
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= SHR;
+ inst[1] |= SHR;
compiler->mode32 = 1;
BINARY_IMM32(AND, 1, TMP_REG2, 0);
@@ -1151,13 +1209,9 @@
FAIL_IF(!inst);
inst[0] = OR_r_rm;
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
-
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
compiler->mode32 = 1;
- FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, dst_r, 0));
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
jump_inst2[1] = U8(compiler->size - size2);
diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c
index d003e3b..cc330d4 100644
--- a/src/sljit/sljitNativeX86_common.c
+++ b/src/sljit/sljitNativeX86_common.c
@@ -140,192 +140,232 @@
#define U8(v) ((sljit_u8)(v))
-
/* Size flags for emit_x86_instruction: */
-#define EX86_BIN_INS 0x0010
-#define EX86_SHIFT_INS 0x0020
-#define EX86_REX 0x0040
-#define EX86_NO_REXW 0x0080
-#define EX86_BYTE_ARG 0x0100
-#define EX86_HALF_ARG 0x0200
-#define EX86_PREF_66 0x0400
-#define EX86_PREF_F2 0x0800
-#define EX86_PREF_F3 0x1000
-#define EX86_SSE2_OP1 0x2000
-#define EX86_SSE2_OP2 0x4000
+#define EX86_BIN_INS ((sljit_uw)0x000010)
+#define EX86_SHIFT_INS ((sljit_uw)0x000020)
+#define EX86_BYTE_ARG ((sljit_uw)0x000040)
+#define EX86_HALF_ARG ((sljit_uw)0x000080)
+/* Size flags for both emit_x86_instruction and emit_vex_instruction: */
+#define EX86_REX ((sljit_uw)0x000100)
+#define EX86_NO_REXW ((sljit_uw)0x000200)
+#define EX86_PREF_66 ((sljit_uw)0x000400)
+#define EX86_PREF_F2 ((sljit_uw)0x000800)
+#define EX86_PREF_F3 ((sljit_uw)0x001000)
+#define EX86_SSE2_OP1 ((sljit_uw)0x002000)
+#define EX86_SSE2_OP2 ((sljit_uw)0x004000)
#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
+#define EX86_VEX_EXT ((sljit_uw)0x008000)
+/* Op flags for emit_vex_instruction: */
+#define VEX_OP_0F38 ((sljit_uw)0x010000)
+#define VEX_OP_0F3A ((sljit_uw)0x020000)
+#define VEX_SSE2_OPV ((sljit_uw)0x040000)
+#define VEX_AUTO_W ((sljit_uw)0x080000)
+#define VEX_W ((sljit_uw)0x100000)
+#define VEX_256 ((sljit_uw)0x200000)
+
+#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
+#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
/* --------------------------------------------------------------------- */
/* Instruction forms */
/* --------------------------------------------------------------------- */
-#define ADD (/* BINARY */ 0 << 3)
-#define ADD_EAX_i32 0x05
-#define ADD_r_rm 0x03
-#define ADD_rm_r 0x01
-#define ADDSD_x_xm 0x58
-#define ADC (/* BINARY */ 2 << 3)
-#define ADC_EAX_i32 0x15
-#define ADC_r_rm 0x13
-#define ADC_rm_r 0x11
-#define AND (/* BINARY */ 4 << 3)
-#define AND_EAX_i32 0x25
-#define AND_r_rm 0x23
-#define AND_rm_r 0x21
-#define ANDPD_x_xm 0x54
-#define BSR_r_rm (/* GROUP_0F */ 0xbd)
-#define BSF_r_rm (/* GROUP_0F */ 0xbc)
-#define BSWAP_r (/* GROUP_0F */ 0xc8)
-#define CALL_i32 0xe8
-#define CALL_rm (/* GROUP_FF */ 2 << 3)
-#define CDQ 0x99
-#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
-#define CMP (/* BINARY */ 7 << 3)
-#define CMP_EAX_i32 0x3d
-#define CMP_r_rm 0x3b
-#define CMP_rm_r 0x39
-#define CMPS_x_xm 0xc2
-#define CMPXCHG_rm_r 0xb1
-#define CMPXCHG_rm8_r 0xb0
-#define CVTPD2PS_x_xm 0x5a
-#define CVTSI2SD_x_rm 0x2a
-#define CVTTSD2SI_r_xm 0x2c
-#define DIV (/* GROUP_F7 */ 6 << 3)
-#define DIVSD_x_xm 0x5e
-#define EXTRACTPS_x_xm 0x17
-#define FLDS 0xd9
-#define FLDL 0xdd
-#define FSTPS 0xd9
-#define FSTPD 0xdd
-#define INSERTPS_x_xm 0x21
-#define INT3 0xcc
-#define IDIV (/* GROUP_F7 */ 7 << 3)
-#define IMUL (/* GROUP_F7 */ 5 << 3)
-#define IMUL_r_rm (/* GROUP_0F */ 0xaf)
-#define IMUL_r_rm_i8 0x6b
-#define IMUL_r_rm_i32 0x69
-#define JL_i8 0x7c
-#define JE_i8 0x74
-#define JNC_i8 0x73
-#define JNE_i8 0x75
-#define JMP_i8 0xeb
-#define JMP_i32 0xe9
-#define JMP_rm (/* GROUP_FF */ 4 << 3)
-#define LEA_r_m 0x8d
-#define LOOP_i8 0xe2
-#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
-#define MOV_r_rm 0x8b
-#define MOV_r_i32 0xb8
-#define MOV_rm_r 0x89
-#define MOV_rm_i32 0xc7
-#define MOV_rm8_i8 0xc6
-#define MOV_rm8_r8 0x88
-#define MOVAPS_x_xm 0x28
-#define MOVAPS_xm_x 0x29
-#define MOVD_x_rm 0x6e
-#define MOVD_rm_x 0x7e
-#define MOVDDUP_x_xm 0x12
-#define MOVDQA_x_xm 0x6f
-#define MOVDQA_xm_x 0x7f
-#define MOVHLPS_x_x 0x12
-#define MOVHPD_m_x 0x17
-#define MOVHPD_x_m 0x16
-#define MOVLHPS_x_x 0x16
-#define MOVLPD_m_x 0x13
-#define MOVLPD_x_m 0x12
-#define MOVSD_x_xm 0x10
-#define MOVSD_xm_x 0x11
-#define MOVSHDUP_x_xm 0x16
-#define MOVSXD_r_rm 0x63
-#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
-#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
-#define MOVUPS_x_xm 0x10
-#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
-#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
-#define MUL (/* GROUP_F7 */ 4 << 3)
-#define MULSD_x_xm 0x59
-#define NEG_rm (/* GROUP_F7 */ 3 << 3)
-#define NOP 0x90
-#define NOT_rm (/* GROUP_F7 */ 2 << 3)
-#define OR (/* BINARY */ 1 << 3)
-#define OR_r_rm 0x0b
-#define OR_EAX_i32 0x0d
-#define OR_rm_r 0x09
-#define OR_rm8_r8 0x08
-#define PCMPEQB_x_xm 0x74
-#define PINSRB_x_rm_i8 0x20
-#define PINSRW_x_rm_i8 0xc4
-#define PINSRD_x_rm_i8 0x22
-#define PEXTRB_rm_x_i8 0x14
-#define PEXTRW_rm_x_i8 0x15
-#define PEXTRD_rm_x_i8 0x16
-#define POP_r 0x58
-#define POP_rm 0x8f
-#define POPF 0x9d
-#define PREFETCH 0x18
-#define PSHUFB_x_xm 0x00
-#define PSHUFD_x_xm 0x70
-#define PSHUFLW_x_xm 0x70
-#define PSRLDQ_x 0x73
-#define PUSH_i32 0x68
-#define PUSH_r 0x50
-#define PUSH_rm (/* GROUP_FF */ 6 << 3)
-#define PUSHF 0x9c
-#define PXOR_x_xm 0xef
-#define ROL (/* SHIFT */ 0 << 3)
-#define ROR (/* SHIFT */ 1 << 3)
-#define RET_near 0xc3
-#define RET_i16 0xc2
-#define SBB (/* BINARY */ 3 << 3)
-#define SBB_EAX_i32 0x1d
-#define SBB_r_rm 0x1b
-#define SBB_rm_r 0x19
-#define SAR (/* SHIFT */ 7 << 3)
-#define SHL (/* SHIFT */ 4 << 3)
-#define SHLD (/* GROUP_0F */ 0xa5)
-#define SHRD (/* GROUP_0F */ 0xad)
-#define SHR (/* SHIFT */ 5 << 3)
-#define SHUFPS_x_xm 0xc6
-#define SUB (/* BINARY */ 5 << 3)
-#define SUB_EAX_i32 0x2d
-#define SUB_r_rm 0x2b
-#define SUB_rm_r 0x29
-#define SUBSD_x_xm 0x5c
-#define TEST_EAX_i32 0xa9
-#define TEST_rm_r 0x85
-#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
-#define UCOMISD_x_xm 0x2e
-#define UNPCKLPD_x_xm 0x14
-#define UNPCKLPS_x_xm 0x14
-#define XCHG_EAX_r 0x90
-#define XCHG_r_rm 0x87
-#define XOR (/* BINARY */ 6 << 3)
-#define XOR_EAX_i32 0x35
-#define XOR_r_rm 0x33
-#define XOR_rm_r 0x31
-#define XORPD_x_xm 0x57
+#define ADD (/* BINARY */ 0 << 3)
+#define ADD_EAX_i32 0x05
+#define ADD_r_rm 0x03
+#define ADD_rm_r 0x01
+#define ADDSD_x_xm 0x58
+#define ADC (/* BINARY */ 2 << 3)
+#define ADC_EAX_i32 0x15
+#define ADC_r_rm 0x13
+#define ADC_rm_r 0x11
+#define AND (/* BINARY */ 4 << 3)
+#define AND_EAX_i32 0x25
+#define AND_r_rm 0x23
+#define AND_rm_r 0x21
+#define ANDPD_x_xm 0x54
+#define BSR_r_rm (/* GROUP_0F */ 0xbd)
+#define BSF_r_rm (/* GROUP_0F */ 0xbc)
+#define BSWAP_r (/* GROUP_0F */ 0xc8)
+#define CALL_i32 0xe8
+#define CALL_rm (/* GROUP_FF */ 2 << 3)
+#define CDQ 0x99
+#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
+#define CMP (/* BINARY */ 7 << 3)
+#define CMP_EAX_i32 0x3d
+#define CMP_r_rm 0x3b
+#define CMP_rm_r 0x39
+#define CMPS_x_xm 0xc2
+#define CMPXCHG_rm_r 0xb1
+#define CMPXCHG_rm8_r 0xb0
+#define CVTPD2PS_x_xm 0x5a
+#define CVTPS2PD_x_xm 0x5a
+#define CVTSI2SD_x_rm 0x2a
+#define CVTTSD2SI_r_xm 0x2c
+#define DIV (/* GROUP_F7 */ 6 << 3)
+#define DIVSD_x_xm 0x5e
+#define EXTRACTPS_x_xm 0x17
+#define FLDS 0xd9
+#define FLDL 0xdd
+#define FSTPS 0xd9
+#define FSTPD 0xdd
+#define INSERTPS_x_xm 0x21
+#define INT3 0xcc
+#define IDIV (/* GROUP_F7 */ 7 << 3)
+#define IMUL (/* GROUP_F7 */ 5 << 3)
+#define IMUL_r_rm (/* GROUP_0F */ 0xaf)
+#define IMUL_r_rm_i8 0x6b
+#define IMUL_r_rm_i32 0x69
+#define JL_i8 0x7c
+#define JE_i8 0x74
+#define JNC_i8 0x73
+#define JNE_i8 0x75
+#define JMP_i8 0xeb
+#define JMP_i32 0xe9
+#define JMP_rm (/* GROUP_FF */ 4 << 3)
+#define LEA_r_m 0x8d
+#define LOOP_i8 0xe2
+#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
+#define MOV_r_rm 0x8b
+#define MOV_r_i32 0xb8
+#define MOV_rm_r 0x89
+#define MOV_rm_i32 0xc7
+#define MOV_rm8_i8 0xc6
+#define MOV_rm8_r8 0x88
+#define MOVAPS_x_xm 0x28
+#define MOVAPS_xm_x 0x29
+#define MOVD_x_rm 0x6e
+#define MOVD_rm_x 0x7e
+#define MOVDDUP_x_xm 0x12
+#define MOVDQA_x_xm 0x6f
+#define MOVDQA_xm_x 0x7f
+#define MOVHLPS_x_x 0x12
+#define MOVHPD_m_x 0x17
+#define MOVHPD_x_m 0x16
+#define MOVLHPS_x_x 0x16
+#define MOVLPD_m_x 0x13
+#define MOVLPD_x_m 0x12
+#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)
+#define MOVQ_x_xm (/* GROUP_0F */ 0x7e)
+#define MOVSD_x_xm 0x10
+#define MOVSD_xm_x 0x11
+#define MOVSHDUP_x_xm 0x16
+#define MOVSXD_r_rm 0x63
+#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
+#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
+#define MOVUPS_x_xm 0x10
+#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
+#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
+#define MUL (/* GROUP_F7 */ 4 << 3)
+#define MULSD_x_xm 0x59
+#define NEG_rm (/* GROUP_F7 */ 3 << 3)
+#define NOP 0x90
+#define NOT_rm (/* GROUP_F7 */ 2 << 3)
+#define OR (/* BINARY */ 1 << 3)
+#define OR_r_rm 0x0b
+#define OR_EAX_i32 0x0d
+#define OR_rm_r 0x09
+#define OR_rm8_r8 0x08
+#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)
+#define PCMPEQB_x_xm 0x74
+#define PINSRB_x_rm_i8 0x20
+#define PINSRW_x_rm_i8 0xc4
+#define PINSRD_x_rm_i8 0x22
+#define PEXTRB_rm_x_i8 0x14
+#define PEXTRW_rm_x_i8 0x15
+#define PEXTRD_rm_x_i8 0x16
+#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)
+#define PMOVSXBD_x_xm 0x21
+#define PMOVSXBQ_x_xm 0x22
+#define PMOVSXBW_x_xm 0x20
+#define PMOVSXDQ_x_xm 0x25
+#define PMOVSXWD_x_xm 0x23
+#define PMOVSXWQ_x_xm 0x24
+#define PMOVZXBD_x_xm 0x31
+#define PMOVZXBQ_x_xm 0x32
+#define PMOVZXBW_x_xm 0x30
+#define PMOVZXDQ_x_xm 0x35
+#define PMOVZXWD_x_xm 0x33
+#define PMOVZXWQ_x_xm 0x34
+#define POP_r 0x58
+#define POP_rm 0x8f
+#define POPF 0x9d
+#define PREFETCH 0x18
+#define PSHUFB_x_xm 0x00
+#define PSHUFD_x_xm 0x70
+#define PSHUFLW_x_xm 0x70
+#define PSRLDQ_x 0x73
+#define PUSH_i32 0x68
+#define PUSH_r 0x50
+#define PUSH_rm (/* GROUP_FF */ 6 << 3)
+#define PUSHF 0x9c
+#define PXOR_x_xm 0xef
+#define ROL (/* SHIFT */ 0 << 3)
+#define ROR (/* SHIFT */ 1 << 3)
+#define RET_near 0xc3
+#define RET_i16 0xc2
+#define SBB (/* BINARY */ 3 << 3)
+#define SBB_EAX_i32 0x1d
+#define SBB_r_rm 0x1b
+#define SBB_rm_r 0x19
+#define SAR (/* SHIFT */ 7 << 3)
+#define SHL (/* SHIFT */ 4 << 3)
+#define SHLD (/* GROUP_0F */ 0xa5)
+#define SHRD (/* GROUP_0F */ 0xad)
+#define SHR (/* SHIFT */ 5 << 3)
+#define SHUFPS_x_xm 0xc6
+#define SUB (/* BINARY */ 5 << 3)
+#define SUB_EAX_i32 0x2d
+#define SUB_r_rm 0x2b
+#define SUB_rm_r 0x29
+#define SUBSD_x_xm 0x5c
+#define TEST_EAX_i32 0xa9
+#define TEST_rm_r 0x85
+#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
+#define UCOMISD_x_xm 0x2e
+#define UNPCKLPD_x_xm 0x14
+#define UNPCKLPS_x_xm 0x14
+#define VBROADCASTSD_x_xm 0x19
+#define VBROADCASTSS_x_xm 0x18
+#define VEXTRACTF128_x_ym 0x19
+#define VEXTRACTI128_x_ym 0x39
+#define VINSERTF128_y_y_xm 0x18
+#define VINSERTI128_y_y_xm 0x38
+#define VPBROADCASTB_x_xm 0x78
+#define VPBROADCASTD_x_xm 0x58
+#define VPBROADCASTQ_x_xm 0x59
+#define VPBROADCASTW_x_xm 0x79
+#define VPERMPD_y_ym 0x01
+#define VPERMQ_y_ym 0x00
+#define XCHG_EAX_r 0x90
+#define XCHG_r_rm 0x87
+#define XOR (/* BINARY */ 6 << 3)
+#define XOR_EAX_i32 0x35
+#define XOR_r_rm 0x33
+#define XOR_rm_r 0x31
+#define XORPD_x_xm 0x57
-#define GROUP_0F 0x0f
-#define GROUP_66 0x66
-#define GROUP_F3 0xf3
-#define GROUP_F7 0xf7
-#define GROUP_FF 0xff
-#define GROUP_BINARY_81 0x81
-#define GROUP_BINARY_83 0x83
-#define GROUP_SHIFT_1 0xd1
-#define GROUP_SHIFT_N 0xc1
-#define GROUP_SHIFT_CL 0xd3
-#define GROUP_LOCK 0xf0
+#define GROUP_0F 0x0f
+#define GROUP_66 0x66
+#define GROUP_F3 0xf3
+#define GROUP_F7 0xf7
+#define GROUP_FF 0xff
+#define GROUP_BINARY_81 0x81
+#define GROUP_BINARY_83 0x83
+#define GROUP_SHIFT_1 0xd1
+#define GROUP_SHIFT_N 0xc1
+#define GROUP_SHIFT_CL 0xd3
+#define GROUP_LOCK 0xf0
-#define MOD_REG 0xc0
-#define MOD_DISP8 0x40
+#define MOD_REG 0xc0
+#define MOD_DISP8 0x40
-#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
+#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
-#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
-#define POP_REG(r) (*inst++ = U8(POP_r + (r)))
-#define RET() (*inst++ = RET_near)
-#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
+#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
+#define POP_REG(r) (*inst++ = U8(POP_r + (r)))
+#define RET() (*inst++ = RET_near)
+#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
/* Multithreading does not affect these static variables, since they store
built-in CPU features. Therefore they can be overwritten by different threads
@@ -338,6 +378,8 @@
#define CPU_FEATURE_LZCNT 0x008
#define CPU_FEATURE_TZCNT 0x010
#define CPU_FEATURE_CMOV 0x020
+#define CPU_FEATURE_AVX 0x040
+#define CPU_FEATURE_AVX2 0x080
static sljit_u32 cpu_feature_list = 0;
@@ -370,129 +412,115 @@
/* Utility functions */
/******************************************************/
-static void get_cpu_features(void)
+static void execute_cpu_id(sljit_u32 info[4])
{
- sljit_u32 feature_list = CPU_FEATURE_DETECTED;
- sljit_u32 value_ecx, value_edx;
-
#if defined(_MSC_VER) && _MSC_VER >= 1400
- int CPUInfo[4];
-
- __cpuid(CPUInfo, 0);
- if (CPUInfo[0] >= 7) {
- __cpuidex(CPUInfo, 7, 0);
- if (CPUInfo[1] & 0x8)
- feature_list |= CPU_FEATURE_TZCNT;
- }
-
- __cpuid(CPUInfo, (int)0x80000001);
- if (CPUInfo[2] & 0x20)
- feature_list |= CPU_FEATURE_LZCNT;
-
- __cpuid(CPUInfo, 1);
- value_ecx = (sljit_u32)CPUInfo[2];
- value_edx = (sljit_u32)CPUInfo[3];
+ __cpuidex((int*)info, (int)info[0], (int)info[2]);
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
/* AT&T syntax. */
__asm__ (
- "movl $0x0, %%eax\n"
- "lzcnt %%eax, %%eax\n"
- "setnz %%al\n"
- "movl %%eax, %0\n"
- : "=g" (value_ecx)
- :
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- : "eax"
-#else
- : "rax"
-#endif
- );
-
- if (value_ecx & 0x1)
- feature_list |= CPU_FEATURE_LZCNT;
-
- __asm__ (
- "movl $0x0, %%eax\n"
- "tzcnt %%eax, %%eax\n"
- "setnz %%al\n"
- "movl %%eax, %0\n"
- : "=g" (value_ecx)
- :
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- : "eax"
-#else
- : "rax"
-#endif
- );
-
- if (value_ecx & 0x1)
- feature_list |= CPU_FEATURE_TZCNT;
-
- __asm__ (
- "movl $0x1, %%eax\n"
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- /* On x86-32, there is no red zone, so this
- should work (no need for a local variable). */
- "push %%ebx\n"
-#endif
+ "movl %0, %%esi\n"
+ "movl (%%esi), %%eax\n"
+ "movl 8(%%esi), %%ecx\n"
"cpuid\n"
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- "pop %%ebx\n"
-#endif
- "movl %%edx, %0\n"
- "movl %%edx, %1\n"
- : "=g" (value_ecx), "=g" (value_edx)
+ "movl %%eax, (%%esi)\n"
+ "movl %%ebx, 4(%%esi)\n"
+ "movl %%ecx, 8(%%esi)\n"
+ "movl %%edx, 12(%%esi)\n"
+#else /* !SLJIT_CONFIG_X86_32 */
+ "movq %0, %%rsi\n"
+ "movl (%%rsi), %%eax\n"
+ "movl 8(%%rsi), %%ecx\n"
+ "cpuid\n"
+ "movl %%eax, (%%rsi)\n"
+ "movl %%ebx, 4(%%rsi)\n"
+ "movl %%ecx, 8(%%rsi)\n"
+ "movl %%edx, 12(%%rsi)\n"
+#endif /* SLJIT_CONFIG_X86_32 */
:
+ : "r" (info)
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- : "eax", "ecx", "edx"
-#else
- : "rax", "rbx", "rcx", "rdx"
-#endif
+ : "memory", "eax", "ebx", "ecx", "edx", "esi"
+#else /* !SLJIT_CONFIG_X86_32 */
+ : "memory", "rax", "rbx", "rcx", "rdx", "rsi"
+#endif /* SLJIT_CONFIG_X86_32 */
);
-#else /* _MSC_VER && _MSC_VER >= 1400 */
+#else /* _MSC_VER < 1400 */
/* Intel syntax. */
__asm {
- mov eax, 0
- lzcnt eax, eax
- setnz al
- mov value_ecx, eax
- }
-
- if (value_ecx & 0x1)
- feature_list |= CPU_FEATURE_LZCNT;
-
- __asm {
- mov eax, 0
- tzcnt eax, eax
- setnz al
- mov value_ecx, eax
- }
-
- if (value_ecx & 0x1)
- feature_list |= CPU_FEATURE_TZCNT;
-
- __asm {
- mov eax, 1
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ mov esi, info
+ mov eax, [esi]
+ mov ecx, [esi + 8]
cpuid
- mov value_ecx, ecx
- mov value_edx, edx
+ mov [esi], eax
+ mov [esi + 4], ebx
+ mov [esi + 8], ecx
+ mov [esi + 12], edx
+#else /* !SLJIT_CONFIG_X86_32 */
+ mov rsi, info
+ mov eax, [rsi]
+ mov ecx, [rsi + 8]
+ cpuid
+ mov [rsi], eax
+ mov [rsi + 4], ebx
+ mov [rsi + 8], ecx
+ mov [rsi + 12], edx
+#endif /* SLJIT_CONFIG_X86_32 */
}
#endif /* _MSC_VER && _MSC_VER >= 1400 */
+}
+static void get_cpu_features(void)
+{
+ sljit_u32 feature_list = CPU_FEATURE_DETECTED;
+ sljit_u32 info[4];
+ sljit_u32 max_id;
+
+ info[0] = 0;
+ execute_cpu_id(info);
+ max_id = info[0];
+
+ if (max_id >= 7) {
+ info[0] = 7;
+ info[2] = 0;
+ execute_cpu_id(info);
+
+ if (info[1] & 0x8)
+ feature_list |= CPU_FEATURE_TZCNT;
+ if (info[1] & 0x20)
+ feature_list |= CPU_FEATURE_AVX2;
+ }
+
+ if (max_id >= 1) {
+ info[0] = 1;
+ execute_cpu_id(info);
+
+ if (info[2] & 0x80000)
+ feature_list |= CPU_FEATURE_SSE41;
+ if (info[2] & 0x10000000)
+ feature_list |= CPU_FEATURE_AVX;
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
- if (value_edx & 0x4000000)
- feature_list |= CPU_FEATURE_SSE2;
+ if (info[3] & 0x4000000)
+ feature_list |= CPU_FEATURE_SSE2;
#endif
- if (value_ecx & 0x80000)
- feature_list |= CPU_FEATURE_SSE41;
- if (value_edx & 0x8000)
- feature_list |= CPU_FEATURE_CMOV;
+ if (info[3] & 0x8000)
+ feature_list |= CPU_FEATURE_CMOV;
+ }
+
+ info[0] = 0x80000001;
+ info[2] = 0; /* Silences an incorrect compiler warning. */
+ execute_cpu_id(info);
+
+ if (info[2] & 0x20)
+ feature_list |= CPU_FEATURE_LZCNT;
cpu_feature_list = feature_list;
}
@@ -586,7 +614,7 @@
label_addr = jump->u.target - (sljit_uw)executable_offset;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
+ if ((sljit_sw)(label_addr - (jump->addr + 2)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 6)) < HALFWORD_MIN)
return generate_far_jump_code(jump, code_ptr);
#endif
@@ -782,7 +810,7 @@
switch (feature_type) {
case SLJIT_HAS_FPU:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_feature_list == 0)
get_cpu_features();
@@ -818,18 +846,23 @@
case SLJIT_HAS_PREFETCH:
case SLJIT_HAS_COPY_F32:
case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
- case SLJIT_HAS_SSE2:
- case SLJIT_HAS_SIMD:
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
+ case SLJIT_HAS_AVX:
if (cpu_feature_list == 0)
get_cpu_features();
- return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
-#else /* !SLJIT_DETECT_SSE2 */
- return 1;
-#endif /* SLJIT_DETECT_SSE2 */
-
+ return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
+ case SLJIT_HAS_AVX2:
+ if (cpu_feature_list == 0)
+ get_cpu_features();
+ return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
+ case SLJIT_HAS_SIMD:
+ if (cpu_feature_list == 0)
+ get_cpu_features();
+ return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
default:
return 0;
}
@@ -903,8 +936,13 @@
#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
-static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
- sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w);
+static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
+ sljit_u8 opcode, sljit_uw pref,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
+
+static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
+ sljit_u8 opcode, sljit_uw pref,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
@@ -1078,7 +1116,8 @@
*inst = MOV_rm_r;
return SLJIT_SUCCESS;
}
- if (src & SLJIT_IMM) {
+
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
@@ -1287,7 +1326,7 @@
compiler->mode32 = 0;
#endif
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
@@ -1316,47 +1355,27 @@
#else
dst_r = src;
#endif
- }
+ } else {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
- /* src, dst are registers. */
- SLJIT_ASSERT(FAST_IS_REG(dst));
- if (reg_map[dst] < 4) {
- if (dst != src)
- EMIT_MOV(compiler, dst, 0, src, 0);
- inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
- }
- else {
- if (dst != src)
- EMIT_MOV(compiler, dst, 0, src, 0);
- if (sign) {
- /* shl reg, 24 */
- inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
- FAIL_IF(!inst);
- *inst |= SHL;
- /* sar reg, 24 */
- inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
- FAIL_IF(!inst);
- *inst |= SAR;
- }
- else {
+ if (FAST_IS_REG(src) && reg_map[src] >= 4) {
+ /* Both src and dst are registers. */
+ SLJIT_ASSERT(FAST_IS_REG(dst));
+
+ if (src == dst && !sign) {
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
FAIL_IF(!inst);
*(inst + 1) |= AND;
+ return SLJIT_SUCCESS;
}
+
+ EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
+ src = TMP_REG1;
+ srcw = 0;
}
- return SLJIT_SUCCESS;
- }
-#endif
- else {
+#endif /* !SLJIT_CONFIG_X86_32 */
+
/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
+ FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, 0, dst_r, src, srcw));
}
if (dst & SLJIT_MEM) {
@@ -1403,7 +1422,7 @@
compiler->mode32 = 0;
#endif
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
@@ -1424,12 +1443,8 @@
if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
dst_r = src;
- else {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
- }
+ else
+ FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, 0, dst_r, src, srcw));
if (dst & SLJIT_MEM) {
inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
@@ -1491,20 +1506,14 @@
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_F3, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = is_clz ? LZCNT_r_rm : TZCNT_r_rm;
+ FAIL_IF(emit_groupf(compiler, is_clz ? LZCNT_r_rm : TZCNT_r_rm, EX86_PREF_F3, dst_r, src, srcw));
if (dst & SLJIT_MEM)
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = is_clz ? BSR_r_rm : BSF_r_rm;
+ FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, 0, dst_r, src, srcw));
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
max = is_clz ? (32 + 31) : 32;
@@ -1537,13 +1546,8 @@
if (cpu_feature_list & CPU_FEATURE_CMOV) {
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
-
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CMOVE_r_rm;
- }
- else
+ FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, 0, dst_r, TMP_REG2, 0));
+ } else
FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
if (is_clz) {
@@ -1624,9 +1628,9 @@
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
FAIL_IF(!inst);
if (op == SLJIT_REV_U16)
- *inst |= SHR;
+ inst[1] |= SHR;
else
- *inst |= SAR;
+ inst[1] |= SAR;
}
if (dst & SLJIT_MEM) {
@@ -1691,14 +1695,14 @@
if (op == SLJIT_MOV_S32)
op = SLJIT_MOV_U32;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
if (op == SLJIT_MOV_U32)
op = SLJIT_MOV_S32;
}
}
#endif /* SLJIT_CONFIG_X86_64 */
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
switch (op) {
case SLJIT_MOV_U8:
srcw = (sljit_u8)srcw;
@@ -1810,7 +1814,7 @@
sljit_u8 op_imm = U8(op_types & 0xff);
if (dst == src1 && dstw == src1w) {
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
@@ -1844,7 +1848,7 @@
/* Only for cumulative operations. */
if (dst == src2 && dstw == src2w) {
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
@@ -1878,7 +1882,7 @@
/* General version. */
if (FAST_IS_REG(dst)) {
EMIT_MOV(compiler, dst, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
}
else {
@@ -1890,7 +1894,7 @@
else {
/* This version requires less memory writing. */
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
}
else {
@@ -1917,7 +1921,7 @@
sljit_u8 op_imm = U8(op_types & 0xff);
if (dst == src1 && dstw == src1w) {
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
@@ -1951,7 +1955,7 @@
/* General version. */
if (FAST_IS_REG(dst) && dst != src2) {
EMIT_MOV(compiler, dst, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
}
else {
@@ -1963,7 +1967,7 @@
else {
/* This version requires less memory writing. */
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
}
else {
@@ -1986,20 +1990,12 @@
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
/* Register destination. */
- if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = IMUL_r_rm;
- }
- else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = IMUL_r_rm;
- }
- else if (src1 & SLJIT_IMM) {
- if (src2 & SLJIT_IMM) {
+ if (dst_r == src1 && src2 != SLJIT_IMM) {
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, 0, dst_r, src2, src2w));
+ } else if (dst_r == src2 && src1 != SLJIT_IMM) {
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, 0, dst_r, src1, src1w));
+ } else if (src1 == SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
src2 = dst_r;
src2w = 0;
@@ -2036,14 +2032,11 @@
if (dst_r != src2)
EMIT_MOV(compiler, dst_r, 0, src2, src2w);
FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = IMUL_r_rm;
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, 0, dst_r, TMP_REG2, 0));
}
#endif
}
- else if (src2 & SLJIT_IMM) {
+ else if (src2 == SLJIT_IMM) {
/* Note: src1 is NOT immediate. */
if (src2w <= 127 && src2w >= -128) {
@@ -2078,11 +2071,7 @@
if (dst_r != src1)
EMIT_MOV(compiler, dst_r, 0, src1, src1w);
FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
-
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = IMUL_r_rm;
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, 0, dst_r, TMP_REG2, 0));
}
#endif
} else {
@@ -2090,10 +2079,7 @@
if (ADDRESSING_DEPENDS_ON(src2, dst_r))
dst_r = TMP_REG1;
EMIT_MOV(compiler, dst_r, 0, src1, src1w);
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = IMUL_r_rm;
+ FAIL_IF(emit_groupf(compiler, IMUL_r_rm, 0, dst_r, src2, src2w));
}
if (dst & SLJIT_MEM)
@@ -2126,10 +2112,10 @@
done = 1;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
#else
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
#endif
FAIL_IF(!inst);
@@ -2139,10 +2125,10 @@
}
else if (FAST_IS_REG(src2)) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
#else
- if (src1 & SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
#endif
FAIL_IF(!inst);
@@ -2166,16 +2152,16 @@
sljit_u8* inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(CMP_EAX_i32, src2w);
return SLJIT_SUCCESS;
}
if (FAST_IS_REG(src1)) {
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
}
else {
@@ -2186,15 +2172,15 @@
return SLJIT_SUCCESS;
}
- if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
+ if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
FAIL_IF(!inst);
*inst = CMP_rm_r;
return SLJIT_SUCCESS;
}
- if (src2 & SLJIT_IMM) {
- if (src1 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
src1 = TMP_REG1;
src1w = 0;
@@ -2217,25 +2203,25 @@
sljit_u8* inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
#else
- if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
+ if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
#endif
BINARY_EAX_IMM(TEST_EAX_i32, src2w);
return SLJIT_SUCCESS;
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
+ if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
#else
- if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
+ if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
#endif
BINARY_EAX_IMM(TEST_EAX_i32, src1w);
return SLJIT_SUCCESS;
}
- if (!(src1 & SLJIT_IMM)) {
- if (src2 & SLJIT_IMM) {
+ if (src1 != SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (IS_HALFWORD(src2w) || compiler->mode32) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
@@ -2263,8 +2249,8 @@
}
}
- if (!(src2 & SLJIT_IMM)) {
- if (src1 & SLJIT_IMM) {
+ if (src2 != SLJIT_IMM) {
+ if (src1 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (IS_HALFWORD(src1w) || compiler->mode32) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
@@ -2293,7 +2279,7 @@
}
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (IS_HALFWORD(src2w) || compiler->mode32) {
inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
@@ -2331,18 +2317,18 @@
#endif
sljit_u8* inst;
- if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
+ if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
if (dst == src1 && dstw == src1w) {
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
return SLJIT_SUCCESS;
}
if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -2350,14 +2336,14 @@
EMIT_MOV(compiler, dst, 0, src1, src1w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
return SLJIT_SUCCESS;
}
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -2367,7 +2353,7 @@
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
}
@@ -2385,7 +2371,7 @@
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif
@@ -2411,7 +2397,7 @@
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
- *inst |= mode;
+ inst[1] |= mode;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
@@ -2434,7 +2420,7 @@
sljit_s32 src2, sljit_sw src2w)
{
/* The CPU does not set flags if the shift count is 0. */
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
src2w &= compiler->mode32 ? 0x1f : 0x3f;
#else /* !SLJIT_CONFIG_X86_64 */
@@ -2499,7 +2485,7 @@
return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
if (!HAS_FLAGS(op)) {
- if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
+ if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
if (FAST_IS_REG(dst) && src2 == dst) {
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
@@ -2522,9 +2508,9 @@
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_XOR:
if (!HAS_FLAGS(op)) {
- if ((src2 & SLJIT_IMM) && src2w == -1)
+ if (src2 == SLJIT_IMM && src2w == -1)
return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
- if ((src1 & SLJIT_IMM) && src1w == -1)
+ if (src1 == SLJIT_IMM && src1w == -1)
return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
}
@@ -2610,7 +2596,7 @@
compiler->mode32 = op & SLJIT_32;
#endif /* SLJIT_CONFIG_X86_64 */
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
src3w &= 0x1f;
#else /* !SLJIT_CONFIG_X86_32 */
@@ -2637,7 +2623,7 @@
}
#endif /* SLJIT_CONFIG_X86_32 */
- if (dst_reg == SLJIT_PREF_SHIFT_REG && !(src3 & SLJIT_IMM) && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
+ if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
src1_reg = TMP_REG1;
@@ -2662,7 +2648,7 @@
if (src3 != SLJIT_PREF_SHIFT_REG)
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
} else {
- if (src2_reg == SLJIT_PREF_SHIFT_REG && !(src3 & SLJIT_IMM) && src3 != SLJIT_PREF_SHIFT_REG) {
+ if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
#endif /* SLJIT_CONFIG_X86_64 */
@@ -2710,7 +2696,7 @@
}
#endif /* SLJIT_CONFIG_X86_64 */
- if (!(src3 & SLJIT_IMM) && src3 != SLJIT_PREF_SHIFT_REG) {
+ if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
if (!restore_ecx) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
@@ -2741,7 +2727,7 @@
FAIL_IF(!inst);
inst[0] = GROUP_0F;
- if (src3 & SLJIT_IMM) {
+ if (src3 == SLJIT_IMM) {
inst[1] = U8((is_left ? SHLD : SHRD) - 1);
/* Immediate argument is added separately. */
@@ -2823,7 +2809,7 @@
{
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
- if (type == SLJIT_INT_REGISTER) {
+ if (type == SLJIT_GP_REGISTER) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
return -1;
@@ -2881,40 +2867,43 @@
sse2_buffer[13] = 0x7fffffff;
}
-static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
- sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
+static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
+ sljit_u8 opcode, sljit_uw pref,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
{
- sljit_u8 *inst;
-
- inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+ sljit_u8 *inst = emit_x86_instruction(compiler, 2 | pref, dst, 0, src, srcw);
FAIL_IF(!inst);
inst[0] = GROUP_0F;
inst[1] = opcode;
return SLJIT_SUCCESS;
}
-static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
- sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
+static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
+ sljit_u8 opcode, sljit_uw pref,
+ sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
- inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
+ SLJIT_ASSERT((pref & EX86_SSE2) && ((pref & VEX_OP_0F38) || (pref & VEX_OP_0F3A)));
+
+ inst = emit_x86_instruction(compiler, 3 | (pref & ~(VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
FAIL_IF(!inst);
inst[0] = GROUP_0F;
- inst[1] = opcode;
+ inst[1] = U8((pref & VEX_OP_0F38) ? 0x38 : 0x3A);
+ inst[2] = opcode;
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
{
- return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
+ return emit_groupf(compiler, MOVSD_x_xm, (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
}
static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
{
- return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
+ return emit_groupf(compiler, MOVSD_xm_x, (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2922,7 +2911,6 @@
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r;
- sljit_u8 *inst;
CHECK_EXTRA_REGS(dst, dstw, (void)0);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
@@ -2932,10 +2920,7 @@
compiler->mode32 = 0;
#endif
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTTSD2SI_r_xm;
+ FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
if (dst & SLJIT_MEM)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
@@ -2947,7 +2932,6 @@
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
- sljit_u8 *inst;
CHECK_EXTRA_REGS(src, srcw, (void)0);
@@ -2956,7 +2940,7 @@
compiler->mode32 = 0;
#endif
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
srcw = (sljit_s32)srcw;
@@ -2966,10 +2950,7 @@
srcw = 0;
}
- inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = CVTSI2SD_x_rm;
+ FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm, EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
@@ -2987,7 +2968,7 @@
case SLJIT_ORDERED_EQUAL:
/* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
- FAIL_IF(emit_sse2(compiler, CMPS_x_xm, op & SLJIT_32, TMP_FREG, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, CMPS_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
/* EQ */
FAIL_IF(emit_byte(compiler, 0));
@@ -3005,7 +2986,7 @@
src2 = TMP_FREG;
}
- return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w);
+ return emit_groupf(compiler, UCOMISD_x_xm, EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
}
if (!FAST_IS_REG(src1)) {
@@ -3013,7 +2994,7 @@
src1 = TMP_FREG;
}
- return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w);
+ return emit_groupf(compiler, UCOMISD_x_xm, EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -3044,14 +3025,13 @@
/* We overwrite the high bits of source. From SLJIT point of view,
this is not an issue.
Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
- FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0));
- }
- else {
+ FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm, ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
+ } else {
FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
src = TMP_FREG;
}
- FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0));
+ FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm, ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
if (dst_r == TMP_FREG)
return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
return SLJIT_SUCCESS;
@@ -3069,11 +3049,11 @@
switch (GET_OPCODE(op)) {
case SLJIT_NEG_F64:
- FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, !(op & SLJIT_32), dst_r, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
+ FAIL_IF(emit_groupf(compiler, XORPD_x_xm, EX86_SELECT_66(op) | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
break;
case SLJIT_ABS_F64:
- FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, !(op & SLJIT_32), dst_r, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
+ FAIL_IF(emit_groupf(compiler, ANDPD_x_xm, EX86_SELECT_66(op) | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
break;
}
@@ -3122,19 +3102,19 @@
switch (GET_OPCODE(op)) {
case SLJIT_ADD_F64:
- FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, ADDSD_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
case SLJIT_SUB_F64:
- FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, SUBSD_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
case SLJIT_MUL_F64:
- FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, MULSD_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
case SLJIT_DIV_F64:
- FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
+ FAIL_IF(emit_groupf(compiler, DIVSD_x_xm, EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
break;
}
@@ -3148,6 +3128,8 @@
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ sljit_uw pref;
+
CHECK_ERROR();
CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
ADJUST_LOCAL_OFFSET(src1, src1w);
@@ -3159,9 +3141,10 @@
if (dst_freg == src1) {
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
- FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, !(op & SLJIT_32), TMP_FREG, src1, src1w));
- FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, !(op & SLJIT_32), TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
- return emit_sse2_logic(compiler, XORPD_x_xm, !(op & SLJIT_32), dst_freg, TMP_FREG, 0);
+ pref = EX86_SELECT_66(op) | EX86_SSE2;
+ FAIL_IF(emit_groupf(compiler, XORPD_x_xm, pref, TMP_FREG, src1, src1w));
+ FAIL_IF(emit_groupf(compiler, ANDPD_x_xm, pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
+ return emit_groupf(compiler, XORPD_x_xm, pref, dst_freg, TMP_FREG, 0);
}
if (src1 & SLJIT_MEM) {
@@ -3173,9 +3156,10 @@
if (dst_freg != src2)
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
- FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, !(op & SLJIT_32), dst_freg, src1, src1w));
- FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, !(op & SLJIT_32), dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
- return emit_sse2_logic(compiler, XORPD_x_xm, !(op & SLJIT_32), dst_freg, src1, src1w);
+ pref = EX86_SELECT_66(op) | EX86_SSE2;
+ FAIL_IF(emit_groupf(compiler, XORPD_x_xm, pref, dst_freg, src1, src1w));
+ FAIL_IF(emit_groupf(compiler, ANDPD_x_xm, pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
+ return emit_groupf(compiler, XORPD_x_xm, pref, dst_freg, src1, src1w);
}
/* --------------------------------------------------------------------- */
@@ -3406,7 +3390,6 @@
sljit_sw dstw = 0;
#endif /* SLJIT_CONFIG_X86_32 */
sljit_sw src2w = 0;
- sljit_u8* inst;
CHECK_ERROR();
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
@@ -3424,7 +3407,7 @@
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (dst & SLJIT_MEM) {
- if ((src1 & SLJIT_IMM) || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) {
+ if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) {
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
src1 = src2_reg;
src1w = src2w;
@@ -3451,7 +3434,7 @@
}
}
- if (SLJIT_UNLIKELY(src1 & SLJIT_IMM)) {
+ if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
SLJIT_ASSERT(dst_reg != TMP_REG1);
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
src1 = TMP_REG1;
@@ -3461,14 +3444,10 @@
}
#endif /* SLJIT_CONFIG_X86_32 */
- if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) {
- inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src1, src1w);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = U8(get_jump_code((sljit_uw)type) - 0x40);
- } else {
+ if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+ FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), 0, dst_reg, src1, src1w));
+ else
FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w));
- }
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
if (dst_reg == TMP_REG1)
@@ -3522,9 +3501,8 @@
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
sljit_s32 alignment = SLJIT_SIMD_GET_ALIGNMENT(type);
- sljit_u8 *inst;
sljit_u8 opcode = 0;
- sljit_uw pref = 2 | EX86_SSE2;
+ sljit_uw pref;
CHECK_ERROR();
CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
@@ -3535,41 +3513,47 @@
compiler->mode32 = 1;
#endif /* SLJIT_CONFIG_X86_64 */
- if (reg_size == 4) {
- if (!(srcdst & SLJIT_MEM))
- alignment = 4;
-
- if (type & SLJIT_SIMD_FLOAT) {
- if (elem_size == 2 || elem_size == 3) {
- opcode = alignment >= 4 ? MOVAPS_x_xm : MOVUPS_x_xm;
-
- if (elem_size == 3)
- pref |= EX86_PREF_66;
-
- if (type & SLJIT_SIMD_STORE)
- opcode = U8(opcode + 1);
- }
- } else {
- opcode = (type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm;
- pref |= alignment >= 4 ? EX86_PREF_66 : EX86_PREF_F3;
- }
-
- if (opcode == 0)
+ switch (reg_size) {
+ case 4:
+ pref = 2 | EX86_SSE2;
+ break;
+ case 5:
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
return SLJIT_ERR_UNSUPPORTED;
-
- if (type & SLJIT_SIMD_TEST)
- return SLJIT_SUCCESS;
-
- inst = emit_x86_instruction(compiler, pref, freg, 0, srcdst, srcdstw);
- FAIL_IF(!inst);
-
- inst[0] = GROUP_0F;
- inst[1] = opcode;
- return SLJIT_SUCCESS;
+ pref = EX86_SSE2 | VEX_256;
+ break;
+ default:
+ return SLJIT_ERR_UNSUPPORTED;
}
- /* TODO: Support VEX prefix and longer reg types. */
- return SLJIT_ERR_UNSUPPORTED;
+ if (!(srcdst & SLJIT_MEM))
+ alignment = reg_size;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 2 || elem_size == 3) {
+ opcode = alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
+
+ if (elem_size == 3)
+ pref |= EX86_PREF_66;
+
+ if (type & SLJIT_SIMD_STORE)
+ opcode = U8(opcode + 1);
+ }
+ } else {
+ opcode = (type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm;
+ pref |= alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3;
+ }
+
+ if (opcode == 0)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (pref & VEX_256)
+ return emit_vex_instruction(compiler, opcode | pref, freg, 0, srcdst, srcdstw);
+
+ return emit_groupf(compiler, opcode, pref, freg, srcdst, srcdstw);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
@@ -3591,168 +3575,188 @@
CHECK_EXTRA_REGS(src, srcw, (void)0);
}
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
+ return SLJIT_ERR_UNSUPPORTED;
+#else /* !SLJIT_CONFIG_X86_32 */
compiler->mode32 = 1;
-#endif /* SLJIT_CONFIG_X86_64 */
- if (reg_size == 4) {
- if (type & SLJIT_SIMD_FLOAT) {
- if (elem_size < 2 || elem_size > 3)
- return SLJIT_ERR_UNSUPPORTED;
+ if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
+ return SLJIT_ERR_UNSUPPORTED;
+#endif /* SLJIT_CONFIG_X86_32 */
+ if (cpu_feature_list & CPU_FEATURE_AVX2) {
+ if (reg_size < 4 || reg_size > 5)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (src != SLJIT_IMM && (reg_size == 5 || elem_size < 3 || !(type & SLJIT_SIMD_FLOAT))) {
if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
- if (src & SLJIT_IMM) {
- inst = emit_x86_instruction(compiler, 2 | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = XORPD_x_xm;
- return SLJIT_SUCCESS;
- }
-
- if (elem_size == 2 && freg != src) {
- FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
+ if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size >= 3)
+ compiler->mode32 = 0;
+#endif /* SLJIT_CONFIG_X86_64 */
+ FAIL_IF(emit_groupf(compiler, MOVD_x_rm, EX86_PREF_66 | EX86_SSE2_OP1, freg, src, srcw));
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
src = freg;
srcw = 0;
}
- inst = emit_x86_instruction(compiler, 2 | (elem_size == 3 ? EX86_PREF_F2 : 0) | EX86_SSE2, freg, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm;
-
- if (elem_size == 2)
- return emit_byte(compiler, 0);
- return SLJIT_SUCCESS;
- }
-
- if (src & SLJIT_IMM) {
- if (elem_size == 0) {
- srcw = (sljit_u8)srcw;
- srcw |= srcw << 8;
- srcw |= srcw << 16;
- elem_size = 2;
- } else if (elem_size == 1) {
- srcw = (sljit_u16)srcw;
- srcw |= srcw << 16;
- elem_size = 2;
- }
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (elem_size == 2 && (sljit_s32)srcw == -1)
- srcw = -1;
-#endif /* SLJIT_CONFIG_X86_64 */
-
- if (srcw == 0 || srcw == -1) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = srcw == 0 ? PXOR_x_xm : PCMPEQB_x_xm;
- return SLJIT_SUCCESS;
- }
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (elem_size == 3)
- FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
- else {
-#endif /* SLJIT_CONFIG_X86_64 */
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
- src = TMP_REG1;
- srcw = 0;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- }
-#endif /* SLJIT_CONFIG_X86_64 */
- }
-
+ switch (elem_size) {
+ case 0:
+ size = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ case 1:
+ size = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ case 2:
+ size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSS_x_xm : VPBROADCASTD_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ default:
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (elem_size > 2)
- return SLJIT_ERR_UNSUPPORTED;
+ size = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+#else /* !SLJIT_CONFIG_X86_32 */
+ size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSD_x_xm : VPBROADCASTQ_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
#endif /* SLJIT_CONFIG_X86_32 */
-
- if (type & SLJIT_SIMD_TEST)
- return SLJIT_SUCCESS;
-
- size = 2;
- opcode = MOVD_x_rm;
-
- switch (elem_size) {
- case 0:
- if (!FAST_IS_REG(src)) {
- opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
- size = 3;
+ break;
}
- break;
- case 1:
- if (!FAST_IS_REG(src))
- opcode = PINSRW_x_rm_i8;
- break;
- case 2:
- break;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- case 3:
- /* MOVQ */
- compiler->mode32 = 0;
- break;
-#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (reg_size == 5)
+ size |= VEX_256;
+
+ return emit_vex_instruction(compiler, size, freg, 0, src, srcw);
+ }
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM) {
+ if (reg_size == 5)
+ return emit_vex_instruction(compiler, XORPD_x_xm | VEX_256 | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
+
+ return emit_groupf(compiler, XORPD_x_xm, (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0);
}
- inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = opcode;
-
- if (size == 3) {
- SLJIT_ASSERT(opcode == 0x3a);
- inst[2] = PINSRB_x_rm_i8;
+ if (elem_size == 2 && freg != src) {
+ FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
+ src = freg;
+ srcw = 0;
}
- if (opcode != MOVD_x_rm)
- FAIL_IF(emit_byte(compiler, 0));
+ FAIL_IF(emit_groupf(compiler, elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm, (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2, freg, src, srcw));
- switch (elem_size) {
- case 0:
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, TMP_FREG, 0, TMP_FREG, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PXOR_x_xm;
-
- inst = emit_x86_instruction(compiler, 3 | EX86_PREF_66 | EX86_SSE2, freg, 0, TMP_FREG, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = 0x38;
- inst[2] = PSHUFB_x_xm;
- return SLJIT_SUCCESS;
- case 1:
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSHUFLW_x_xm;
-
- FAIL_IF(emit_byte(compiler, 0));
- /* fallthrough */
- case 2:
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSHUFD_x_xm;
-
+ if (elem_size == 2)
return emit_byte(compiler, 0);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- case 3:
- compiler->mode32 = 1;
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSHUFD_x_xm;
-
- return emit_byte(compiler, 0x44);
-#endif /* SLJIT_CONFIG_X86_64 */
- }
+ return SLJIT_SUCCESS;
}
- /* TODO: Support VEX prefix and longer reg types. */
- return SLJIT_ERR_UNSUPPORTED;
+ if (src == SLJIT_IMM) {
+ if (elem_size == 0) {
+ srcw = (sljit_u8)srcw;
+ srcw |= srcw << 8;
+ srcw |= srcw << 16;
+ elem_size = 2;
+ } else if (elem_size == 1) {
+ srcw = (sljit_u16)srcw;
+ srcw |= srcw << 16;
+ elem_size = 2;
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size == 2 && (sljit_s32)srcw == -1)
+ srcw = -1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (srcw == 0 || srcw == -1) {
+ if (reg_size == 5)
+ return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQB_x_xm) | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
+
+ return emit_groupf(compiler, srcw == 0 ? PXOR_x_xm : PCMPEQB_x_xm, EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size == 3)
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+ else
+#endif /* SLJIT_CONFIG_X86_64 */
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
+
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ size = 2;
+ opcode = MOVD_x_rm;
+
+ switch (elem_size) {
+ case 0:
+ if (!FAST_IS_REG(src)) {
+ opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
+ size = 3;
+ }
+ break;
+ case 1:
+ if (!FAST_IS_REG(src))
+ opcode = PINSRW_x_rm_i8;
+ break;
+ case 2:
+ break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ case 3:
+ /* MOVQ */
+ compiler->mode32 = 0;
+ break;
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
+
+ inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
+ FAIL_IF(!inst);
+ inst[0] = GROUP_0F;
+ inst[1] = opcode;
+
+ if (reg_size == 5) {
+ SLJIT_ASSERT(opcode == MOVD_x_rm);
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ size = VPBROADCASTD_x_xm;
+#else /* !SLJIT_CONFIG_X86_32 */
+ size = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
+#endif /* SLJIT_CONFIG_X86_32 */
+ return emit_vex_instruction(compiler, size | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
+ }
+
+ if (size == 3) {
+ SLJIT_ASSERT(opcode == 0x3a);
+ inst[2] = PINSRB_x_rm_i8;
+ }
+
+ if (opcode != MOVD_x_rm)
+ FAIL_IF(emit_byte(compiler, 0));
+
+ switch (elem_size) {
+ case 0:
+ FAIL_IF(emit_groupf(compiler, PXOR_x_xm, EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
+ return emit_groupf_ext(compiler, PSHUFB_x_xm, EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
+ case 1:
+ FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm, EX86_PREF_F2 | EX86_SSE2, freg, freg, 0));
+ FAIL_IF(emit_byte(compiler, 0));
+ /* fallthrough */
+ default:
+ FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm, EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
+ return emit_byte(compiler, 0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ case 3:
+ compiler->mode32 = 1;
+ FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm, EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
+ return emit_byte(compiler, 0x44);
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
@@ -3764,83 +3768,155 @@
sljit_u8 *inst;
sljit_u8 opcode = 0;
sljit_uw size;
+ sljit_s32 freg_orig = freg;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ sljit_s32 srcdst_is_ereg = 0;
+ sljit_s32 srcdst_orig = 0;
+ sljit_sw srcdstw_orig = 0;
+#endif /* SLJIT_CONFIG_X86_32 */
CHECK_ERROR();
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
- CHECK_EXTRA_REGS(srcdst, srcdstw, (void)0);
+
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
+ return SLJIT_ERR_UNSUPPORTED;
+#else /* SLJIT_CONFIG_X86_32 */
+ if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
+ return SLJIT_ERR_UNSUPPORTED;
+#endif /* SLJIT_CONFIG_X86_32 */
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+ CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
+
+ if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
+ srcdst_orig = srcdst;
+ srcdstw_orig = srcdstw;
+ srcdst = TMP_REG1;
+ srcdstw = 0;
+ }
+ }
#endif /* SLJIT_CONFIG_X86_64 */
- if (reg_size == 4) {
- if (type & SLJIT_SIMD_FLOAT) {
- if (elem_size == 3) {
- if (type & SLJIT_SIMD_TEST)
- return SLJIT_SUCCESS;
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ if (lane_index == 0) {
+ if (!(type & SLJIT_SIMD_FLOAT)) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size == 3) {
+ compiler->mode32 = 0;
+ elem_size = 2;
+ }
+#endif /* SLJIT_CONFIG_X86_64 */
+ if (srcdst == SLJIT_IMM) {
+ if (elem_size == 0)
+ srcdstw = (sljit_u8)srcdstw;
+ else if (elem_size == 1)
+ srcdstw = (sljit_u16)srcdstw;
- if (srcdst & SLJIT_MEM) {
- if (type & SLJIT_SIMD_STORE)
- opcode = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
- else
- opcode = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
-
- return emit_sse2_logic(compiler, opcode, 1, freg, srcdst, srcdstw);
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
+ srcdst = TMP_REG1;
+ srcdstw = 0;
+ elem_size = 2;
}
- if (type & SLJIT_SIMD_STORE) {
- if (lane_index == 1)
- return emit_sse2_logic(compiler, MOVHLPS_x_x, 0, srcdst, freg, 0);
- return emit_sse2_load(compiler, 0, srcdst, freg, 0);
+ if (elem_size == 2) {
+ if (reg_size == 4)
+ return emit_groupf(compiler, MOVD_x_rm, EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw);
+ return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
}
+ } else if (srcdst & SLJIT_MEM) {
+ SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
+ if (reg_size == 4)
+ return emit_groupf(compiler, MOVSD_x_xm, (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw);
+ return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw);
+ } else if (elem_size == 3) {
+ if (reg_size == 4)
+ return emit_groupf(compiler, MOVQ_x_xm, EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0);
+ return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0);
+ }
+ }
+
+ if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
+ freg = TMP_FREG;
+ lane_index -= (1 << (4 - elem_size));
+ } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
+ srcdst = TMP_FREG;
+ srcdstw = 0;
+ }
+
+ size = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0);
+
+ if (reg_size == 5)
+ FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | VEX_256 | size | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
+ else
+ FAIL_IF(emit_groupf(compiler, (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm, size | EX86_SSE2, freg, freg, 0));
+ } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
+ FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
+ FAIL_IF(emit_byte(compiler, 1));
+
+ freg = TMP_FREG;
+ lane_index -= (1 << (4 - elem_size));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size == 3) {
+ if (srcdst & SLJIT_MEM) {
+ if (type & SLJIT_SIMD_STORE)
+ opcode = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
+ else
+ opcode = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
+
+ FAIL_IF(emit_groupf(compiler, opcode, EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw));
+
+ /* In case of store, freg is not TMP_FREG. */
+ } else if (type & SLJIT_SIMD_STORE) {
if (lane_index == 1)
- return emit_sse2_logic(compiler, MOVLHPS_x_x, 0, freg, srcdst, 0);
- return emit_sse2_store(compiler, 0, freg, 0, srcdst);
+ return emit_groupf(compiler, MOVHLPS_x_x, EX86_SSE2, srcdst, freg, 0);
+ return emit_sse2_load(compiler, 0, srcdst, freg, 0);
+ } else {
+ if (lane_index == 1)
+ FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x, EX86_SSE2, freg, srcdst, 0));
+ else
+ FAIL_IF(emit_sse2_store(compiler, 0, freg, 0, srcdst));
}
-
- if (elem_size != 2)
- return SLJIT_ERR_UNSUPPORTED;
-
- if (type & SLJIT_SIMD_TEST)
- return SLJIT_SUCCESS;
-
- if (!(type & SLJIT_SIMD_STORE)) {
- if (lane_index == 0 && !(srcdst & SLJIT_MEM))
- return emit_sse2_store(compiler, 1, freg, 0, srcdst);
-
- inst = emit_x86_instruction(compiler, 3 | EX86_PREF_66 | EX86_SSE2, freg, 0, srcdst, srcdstw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = 0x3a;
- inst[2] = INSERTPS_x_xm;
-
- return emit_byte(compiler, U8(lane_index << 4));
- }
-
+ } else if (type & SLJIT_SIMD_STORE) {
if (lane_index == 0)
return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg);
if (srcdst & SLJIT_MEM) {
- inst = emit_x86_instruction(compiler, 3 | EX86_PREF_66 | EX86_SSE2, freg, 0, srcdst, srcdstw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = 0x3a;
- inst[2] = EXTRACTPS_x_xm;
-
+ FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm, EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
return emit_byte(compiler, U8(lane_index));
}
- size = 2 | EX86_SSE2;
+ size = EX86_SSE2;
if (srcdst == freg)
opcode = SHUFPS_x_xm;
else {
+ if (cpu_feature_list & CPU_FEATURE_AVX) {
+ FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0));
+ return emit_byte(compiler, U8(lane_index));
+ }
+
switch (lane_index) {
case 1:
opcode = MOVSHDUP_x_xm;
- size = 2 | EX86_PREF_F3 | EX86_SSE2;
+ size = EX86_PREF_F3 | EX86_SSE2;
break;
case 2:
opcode = MOVHLPS_x_x;
@@ -3848,78 +3924,140 @@
default:
SLJIT_ASSERT(lane_index == 3);
opcode = PSHUFD_x_xm;
- size = 2 | EX86_PREF_66 | EX86_SSE2;
+ size = EX86_PREF_66 | EX86_SSE2;
break;
}
}
- inst = emit_x86_instruction(compiler, size, srcdst, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = opcode;
+ FAIL_IF(emit_groupf(compiler, opcode, size, srcdst, freg, 0));
if (opcode == SHUFPS_x_xm || opcode == PSHUFD_x_xm)
return emit_byte(compiler, U8(lane_index));
return SLJIT_SUCCESS;
- }
-
- if (srcdst & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (elem_size < 3)
- srcdstw = (sljit_s32)srcdstw;
-#endif /* SLJIT_CONFIG_X86_64 */
- EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
- srcdst = TMP_REG1;
- srcdstw = 0;
- }
-
- size = 3;
-
- switch (elem_size) {
- case 0:
- opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
- break;
- case 1:
- if (!(type & SLJIT_SIMD_STORE)) {
- size = 2;
- opcode = PINSRW_x_rm_i8;
+ } else {
+ if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
+ FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm, EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
+ FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
} else
- opcode = PEXTRW_rm_x_i8;
- break;
- case 2:
- opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
- break;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- case 3:
- /* PINSRQ / PEXTRQ */
- opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
- compiler->mode32 = 0;
- break;
-#endif /* SLJIT_CONFIG_X86_64 */
+ FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst));
}
- if (opcode == 0)
- return SLJIT_ERR_UNSUPPORTED;
-
- if (type & SLJIT_SIMD_TEST)
+ if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE))
return SLJIT_SUCCESS;
- inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
+ SLJIT_ASSERT(reg_size == 5);
- if (size == 3) {
- inst[1] = 0x3a;
- inst[2] = opcode;
- } else
- inst[1] = opcode;
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
+ return emit_byte(compiler, 0x4e);
+ }
- return emit_byte(compiler, U8(lane_index));
+ FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
+ return emit_byte(compiler, 1);
}
- /* TODO: Support VEX prefix and longer reg types. */
- return SLJIT_ERR_UNSUPPORTED;
+ if (srcdst == SLJIT_IMM) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
+ srcdst = TMP_REG1;
+ srcdstw = 0;
+ }
+
+ size = 3;
+
+ switch (elem_size) {
+ case 0:
+ opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
+ break;
+ case 1:
+ if (!(type & SLJIT_SIMD_STORE)) {
+ size = 2;
+ opcode = PINSRW_x_rm_i8;
+ } else
+ opcode = PEXTRW_rm_x_i8;
+ break;
+ case 2:
+ opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
+ break;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ case 3:
+ /* PINSRQ / PEXTRQ */
+ opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
+ compiler->mode32 = 0;
+ break;
+#endif /* SLJIT_CONFIG_X86_64 */
+ }
+
+ inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
+ FAIL_IF(!inst);
+ inst[0] = GROUP_0F;
+
+ if (size == 3) {
+ inst[1] = 0x3a;
+ inst[2] = opcode;
+ } else
+ inst[1] = opcode;
+
+ FAIL_IF(emit_byte(compiler, U8(lane_index)));
+
+ if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
+ if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
+ SLJIT_ASSERT(reg_size == 5);
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
+ return emit_byte(compiler, 0x4e);
+ }
+
+ FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
+ return emit_byte(compiler, 1);
+ }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ if (srcdst_orig & SLJIT_MEM)
+ return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_32 */
+ return SLJIT_SUCCESS;
+ }
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (elem_size >= 3)
+ return SLJIT_SUCCESS;
+
+ compiler->mode32 = (type & SLJIT_32);
+
+ size = 2;
+
+ if (elem_size == 0)
+ size |= EX86_REX;
+
+ if (elem_size == 2) {
+ if (type & SLJIT_32)
+ return SLJIT_SUCCESS;
+
+ SLJIT_ASSERT(!(compiler->mode32));
+ size = 1;
+ }
+
+ inst = emit_x86_instruction(compiler, size, srcdst, 0, srcdst, 0);
+ FAIL_IF(!inst);
+
+ if (size != 1) {
+ inst[0] = GROUP_0F;
+ inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
+ } else
+ inst[0] = MOVSXD_r_rm;
+#else /* !SLJIT_CONFIG_X86_64 */
+ if (elem_size >= 2)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16, 0,
+ (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
+
+ if (srcdst_orig & SLJIT_MEM)
+ return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
+#endif /* SLJIT_CONFIG_X86_64 */
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
@@ -3928,101 +4066,103 @@
{
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
- sljit_u8 *inst;
sljit_uw pref;
sljit_u8 byte;
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ sljit_s32 opcode3 = TMP_REG1;
+#else /* !SLJIT_CONFIG_X86_32 */
+ sljit_s32 opcode3 = SLJIT_S0;
+#endif /* SLJIT_CONFIG_X86_32 */
CHECK_ERROR();
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
- if (reg_size == 4) {
- if (type & SLJIT_SIMD_FLOAT) {
- pref = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+ SLJIT_ASSERT(reg_map[opcode3] == 3);
- if (elem_size == 3) {
- if (type & SLJIT_SIMD_TEST)
- return SLJIT_SUCCESS;
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
- if (src_lane_index == 0) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = MOVDDUP_x_xm;
- return SLJIT_SUCCESS;
- }
+ if (type & SLJIT_SIMD_FLOAT) {
+ pref = 0;
+ byte = U8(src_lane_index);
- pref = EX86_PREF_66;
- } else if (elem_size != 2)
- return SLJIT_ERR_UNSUPPORTED;
- else if (type & SLJIT_SIMD_TEST)
+ if (elem_size == 3) {
+ if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
- if (freg != src) {
- inst = emit_x86_instruction(compiler, 2 | pref | EX86_SSE2, freg, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = MOVAPS_x_xm;
+ if (reg_size == 5) {
+ if (src_lane_index == 0)
+ return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
+
+ FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+
+ byte = U8(byte | (byte << 2));
+ return emit_byte(compiler, U8(byte | (byte << 4)));
}
- inst = emit_x86_instruction(compiler, 2 | pref | EX86_SSE2, freg, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = SHUFPS_x_xm;
+ if (src_lane_index == 0)
+ return emit_groupf(compiler, MOVDDUP_x_xm, EX86_PREF_F2 | EX86_SSE2, freg, src, 0);
- byte = U8(src_lane_index);
-
- if (elem_size == 2) {
- byte = U8(byte | (byte << 2));
- byte = U8(byte | (byte << 4));
- } else
- byte = U8(byte | (byte << 1));
-
- return emit_byte(compiler, U8(byte));
- }
-
- if (type & SLJIT_SIMD_TEST)
+ /* Changes it to SHUFPD_x_xm. */
+ pref = EX86_PREF_66;
+ } else if (elem_size != 2)
+ return SLJIT_ERR_UNSUPPORTED;
+ else if (type & SLJIT_SIMD_TEST)
return SLJIT_SUCCESS;
- if (elem_size >= 1) {
- if (elem_size == 1) {
- if (src_lane_index >= 4) {
- byte = U8(src_lane_index - 4);
- src_lane_index = 2;
- pref = EX86_PREF_F3;
- } else {
- byte = U8(src_lane_index);
- src_lane_index = 0;
- pref = EX86_PREF_F2;
- }
+ if (reg_size == 5) {
+ SLJIT_ASSERT(elem_size == 2);
- inst = emit_x86_instruction(compiler, 2 | pref | EX86_SSE2, freg, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSHUFLW_x_xm;
+ if (src_lane_index == 0)
+ return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
- byte = U8(byte | (byte << 2));
- FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
+ FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
- src = freg;
+ byte = 0x44;
+ if (src_lane_index >= 4) {
+ byte = 0xee;
+ src_lane_index -= 4;
}
- if (elem_size == 3)
- src_lane_index <<= 1;
-
+ FAIL_IF(emit_byte(compiler, byte));
+ FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
byte = U8(src_lane_index);
- byte = U8(byte | (byte << 2));
+ } else if (freg != src && (cpu_feature_list & CPU_FEATURE_AVX)) {
+ FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
+ } else {
+ if (freg != src)
+ FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm, pref | EX86_SSE2, freg, src, 0));
- if (elem_size == 3)
- byte |= 0x4;
-
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSHUFD_x_xm;
- return emit_byte(compiler, U8(byte | (byte << 4)));
+ FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm, pref | EX86_SSE2, freg, freg, 0));
}
- if (freg != src || src_lane_index != 0) {
+ if (elem_size == 2) {
+ byte = U8(byte | (byte << 2));
+ byte = U8(byte | (byte << 4));
+ } else
+ byte = U8(byte | (byte << 1));
+
+ return emit_byte(compiler, U8(byte));
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (elem_size == 0) {
+ if (reg_size == 5 && src_lane_index >= 16) {
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+ FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
+ src_lane_index &= 0x7;
+ src = freg;
+ }
+
+ if ((freg != src && !(cpu_feature_list & CPU_FEATURE_AVX2)) || src_lane_index != 0) {
pref = 0;
if ((src_lane_index & 0x3) == 0) {
@@ -4032,50 +4172,281 @@
pref = EX86_PREF_F2;
byte = U8(src_lane_index >> 1);
} else {
- if (freg != src) {
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = MOVDQA_x_xm;
- }
+ if (freg == src || !(cpu_feature_list & CPU_FEATURE_AVX2)) {
+ if (freg != src)
+ FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm, EX86_PREF_66 | EX86_SSE2, freg, src, 0));
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, freg, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSRLDQ_x;
- inst[2] |= (0x3 << 3);
+ FAIL_IF(emit_groupf(compiler, PSRLDQ_x, 2 | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0));
+ } else
+ FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0));
FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
}
if (pref != 0) {
- inst = emit_x86_instruction(compiler, 2 | pref | EX86_SSE2, freg, 0, src, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PSHUFLW_x_xm;
-
- byte = U8(byte | (byte << 2));
- FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
+ FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm, pref | EX86_SSE2, freg, src, 0));
+ FAIL_IF(emit_byte(compiler, byte));
}
src = freg;
}
- inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, TMP_FREG, 0, TMP_FREG, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = PXOR_x_xm;
+ if (cpu_feature_list & CPU_FEATURE_AVX2)
+ return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
- inst = emit_x86_instruction(compiler, 3 | EX86_PREF_66 | EX86_SSE2, freg, 0, TMP_FREG, 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = 0x38;
- inst[2] = PSHUFB_x_xm;
+ SLJIT_ASSERT(reg_size == 4);
+ FAIL_IF(emit_groupf(compiler, PXOR_x_xm, EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
+ return emit_groupf_ext(compiler, PSHUFB_x_xm, EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
+ }
+
+ if ((cpu_feature_list & CPU_FEATURE_AVX2) && src_lane_index == 0 && elem_size <= 3) {
+ switch (elem_size) {
+ case 1:
+ pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ case 2:
+ pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ default:
+ pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
+ break;
+ }
+
+ if (reg_size == 5)
+ pref |= VEX_256;
+
+ return emit_vex_instruction(compiler, pref, freg, 0, src, 0);
+ }
+
+ if (reg_size == 5) {
+ switch (elem_size) {
+ case 1:
+ byte = U8(src_lane_index & 0x3);
+ src_lane_index >>= 2;
+ pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
+ break;
+ case 2:
+ byte = U8(src_lane_index & 0x3);
+ src_lane_index >>= 1;
+ pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
+ break;
+ case 3:
+ pref = 0;
+ break;
+ default:
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+ return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
+ }
+
+ if (pref != 0) {
+ FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0));
+ byte = U8(byte | (byte << 2));
+ FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
+
+ if (src_lane_index == 0)
+ return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
+
+ src = freg;
+ }
+
+ FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
+ byte = U8(src_lane_index);
+ byte = U8(byte | (byte << 2));
+ return emit_byte(compiler, U8(byte | (byte << 4)));
+ }
+
+ switch (elem_size) {
+ case 1:
+ byte = U8(src_lane_index & 0x3);
+ src_lane_index >>= 1;
+ pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
+
+ FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm, pref | EX86_SSE2, freg, src, 0));
+ byte = U8(byte | (byte << 2));
+ FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
+
+ if ((cpu_feature_list & CPU_FEATURE_AVX2) && pref == EX86_PREF_F2)
+ return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
+
+ src = freg;
+ /* fallthrough */
+ case 2:
+ byte = U8(src_lane_index);
+ byte = U8(byte | (byte << 2));
+ break;
+ default:
+ byte = U8(src_lane_index << 1);
+ byte = U8(byte | (byte << 2) | 0x4);
+ break;
+ }
+
+ FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm, EX86_PREF_66 | EX86_SSE2, freg, src, 0));
+ return emit_byte(compiler, U8(byte | (byte << 4)));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ sljit_u8 opcode;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (reg_size == 5) {
+ if (!(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+ } else if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (elem_size != 2 || elem2_size != 3)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ return emit_groupf(compiler, CVTPS2PD_x_xm, EX86_SSE2, freg, src, srcw);
+ return emit_vex_instruction(compiler, CVTPS2PD_x_xm | VEX_256 | EX86_SSE2, freg, 0, src, srcw);
+ }
+
+ switch (elem_size) {
+ case 0:
+ if (elem2_size == 1)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
+ else if (elem2_size == 2)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
+ else if (elem2_size == 3)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ break;
+ case 1:
+ if (elem2_size == 2)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
+ else if (elem2_size == 3)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ break;
+ case 2:
+ if (elem2_size == 3)
+ opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
+ else
+ return SLJIT_ERR_UNSUPPORTED;
+ break;
+ default:
+ return SLJIT_ERR_UNSUPPORTED;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ return emit_groupf_ext(compiler, opcode, EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw);
+ return emit_vex_instruction(compiler, opcode | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 dst_r;
+ sljit_uw pref;
+ sljit_u8 *inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ CHECK_EXTRA_REGS(dst, dstw, (void)0);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (reg_size == 4) {
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ pref = EX86_PREF_66 | EX86_SSE2_OP2;
+
+ switch (elem_size) {
+ case 1:
+ FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm, EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0));
+ freg = TMP_FREG;
+ break;
+ case 2:
+ pref = EX86_SSE2_OP2;
+ break;
+ }
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(emit_groupf(compiler, elem_size < 2 ? PMOVMSKB_r_x : MOVMSKPS_r_x, pref, dst_r, freg, 0));
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = type & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+
+ if (elem_size == 1) {
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
+ FAIL_IF(!inst);
+ inst[1] |= SHR;
+ }
+
+ if (dst_r == TMP_REG1)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+
return SLJIT_SUCCESS;
}
- /* TODO: Support VEX prefix and longer reg types. */
- return SLJIT_ERR_UNSUPPORTED;
+ if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+ if (elem_size == 1) {
+ FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
+ FAIL_IF(emit_byte(compiler, 1));
+ FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0));
+ FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x, EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
+ } else {
+ pref = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
+
+ if (elem_size == 0)
+ pref = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
+ else if (elem_size == 3)
+ pref |= EX86_PREF_66;
+
+ FAIL_IF(emit_vex_instruction(compiler, pref, dst_r, 0, freg, 0));
+ }
+
+ if (dst_r == TMP_REG1) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = type & SLJIT_32;
+#endif /* SLJIT_CONFIG_X86_64 */
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+ }
+
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
@@ -4094,8 +4465,7 @@
sljit_s32 mem_reg,
sljit_s32 temp_reg)
{
- sljit_u8 *inst;
- sljit_uw size;
+ sljit_uw pref;
sljit_s32 free_reg = TMP_REG1;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_sw srcw = 0;
@@ -4163,18 +4533,15 @@
/* Lock prefix. */
FAIL_IF(emit_byte(compiler, GROUP_LOCK));
- size = 2;
+ pref = 0;
if (op == SLJIT_MOV_U16)
- size |= EX86_HALF_ARG | EX86_PREF_66;
+ pref = EX86_HALF_ARG | EX86_PREF_66;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (op == SLJIT_MOV_U8)
- size |= EX86_REX;
+ pref = EX86_REX;
#endif /* SLJIT_CONFIG_X86_64 */
- inst = emit_x86_instruction(compiler, size, src_reg, 0, SLJIT_MEM1(mem_reg), 0);
- FAIL_IF(!inst);
- inst[0] = GROUP_0F;
- inst[1] = U8(op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r);
+ FAIL_IF(emit_groupf(compiler, U8(op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r), pref, src_reg, SLJIT_MEM1(mem_reg), 0));
if (temp_reg != SLJIT_R0) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)