target-arm/op_helper.c - platform/external/qemu - Git at Google

 /*
  *  ARM helper routines
  *
  *  Copyright (c) 2005-2007 CodeSourcery, LLC
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2 of the License, or (at your option) any later version.
  *
  * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include "exec.h"
 #include "helpers.h"

 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)

 void raise_exception(int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit();
 }

 /* thread support */

 spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;

 void cpu_lock(void)
 {
     spin_lock(&global_cpu_lock);
 }

 void cpu_unlock(void)
 {
     spin_unlock(&global_cpu_lock);
 }

 uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
                           uint32_t rn, uint32_t maxindex)
 {
     uint32_t val;
     uint32_t tmp;
     int index;
     int shift;
     uint64_t *table;
     table = (uint64_t *)&env->vfp.regs[rn];
     val = 0;
     for (shift = 0; shift < 32; shift += 8) {
         index = (ireg >> shift) & 0xff;
         if (index < maxindex) {
             tmp = (table[index >> 3] >> (index & 7)) & 0xff;
             val |= tmp << shift;
         } else {
             val |= def & (0xff << shift);
         }
     }
     return val;
 }

 #if !defined(CONFIG_USER_ONLY)

 static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr);

 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY  1

 #define SHIFT 0
 #include "softmmu_template.h"

 #define SHIFT 1
 #include "softmmu_template.h"

 #define SHIFT 2
 #include "softmmu_template.h"

 #define SHIFT 3
 #include "softmmu_template.h"

 static void do_unaligned_access (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
     //printf("::UNALIGNED:: addr=%lx is_write=%d is_user=%d retaddr=%p\n", addr, is_write, is_user, retaddr);
     if (mmu_idx)
     {
         env = cpu_single_env;
         env->cp15.c5_data = 0x00000001;  /* corresponds to an alignment fault */
         env->cp15.c6_data = addr;
         env->exception_index = EXCP_DATA_ABORT;
         cpu_loop_exit();
     }
 }

 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
 /* XXX: fix it to restore all registers */
 void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
     TranslationBlock *tb;
     CPUState *saved_env;
     unsigned long pc;
     int ret;

     /* XXX: hack to restore env in all cases, even if not called from
        generated code */
     saved_env = env;
     env = cpu_single_env;
     ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
             pc = (unsigned long)retaddr;
             tb = tb_find_pc(pc);
             if (tb) {
                 /* the PC is inside the translated code. It means that we have
                    a virtual CPU fault */
                 cpu_restore_state(tb, env, pc, NULL);
             }
         }
         raise_exception(env->exception_index);
     }
     env = saved_env;
 }

 #if 1
 #include <string.h>
 /*
  * The following functions are address translation helper functions
  * for fast memory access in QEMU.
  */
 static target_phys_addr_t v2p_mmu(target_ulong addr, int mmu_idx)
 {
     int index;
     target_ulong tlb_addr;
     target_phys_addr_t physaddr;
     void *retaddr;

     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
 redo:
     tlb_addr = env->tlb_table[mmu_idx][index].addr_read;
     if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
         physaddr = addr + env->tlb_table[mmu_idx][index].addend;
     } else {
         /* the page is not in the TLB : fill it */
         retaddr = GETPC();
         tlb_fill(addr, 0, mmu_idx, retaddr);
         goto redo;
     }
     return physaddr;
 }

 /*
  * translation from virtual address of simulated OS
  * to the address of simulation host (not the physical
  * address of simulated OS.
  */
 target_phys_addr_t v2p(target_ulong ptr, int mmu_idx)
 {
     CPUState *saved_env;
     int index;
     target_ulong addr;
     target_phys_addr_t physaddr;

     saved_env = env;
     env = cpu_single_env;
     addr = ptr;
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     if (__builtin_expect(env->tlb_table[mmu_idx][index].addr_read !=
                 (addr & TARGET_PAGE_MASK), 0))
     {
         physaddr = v2p_mmu(addr, mmu_idx);
     } else {
         physaddr = (target_phys_addr_t)addr + env->tlb_table[mmu_idx][index].addend;
     }
     env = saved_env;
     return physaddr;
 }

 #define MINSIZE(x,y)    ((x) < (y) ? (x) : (y))
 /* copy memory from the simulated virtual space to a buffer in QEMU */
 void vmemcpy(target_ulong ptr, char *buf, int size)
 {
     if (buf == NULL) return;
     while (size) {
         int page_remain = TARGET_PAGE_SIZE - (ptr & ~TARGET_PAGE_MASK);
         int to_copy = MINSIZE(size, page_remain);
         char *phys = (char *)v2p(ptr, 0);
         if (phys == NULL) return;
         memcpy(buf, phys, to_copy);
         ptr += to_copy;
         buf += to_copy;
         size -= to_copy;
     }
 }

 /* copy memory from the QEMU buffer to simulated virtual space */
 void pmemcpy(target_ulong ptr, const char *buf, int size)
 {
     if (buf == NULL) return;
     while (size) {
         int page_remain = TARGET_PAGE_SIZE - (ptr & ~TARGET_PAGE_MASK);
         int to_copy = MINSIZE(size, page_remain);
         char *phys = (char *)v2p(ptr, 0);
         if (phys == NULL) return;
         memcpy(phys, buf, to_copy);
         ptr += to_copy;
         buf += to_copy;
         size -= to_copy;
     }
 }

 /* copy a string from the simulated virtual space to a buffer in QEMU */
 void vstrcpy(target_ulong ptr, char *buf, int max)
 {
     char *phys = 0;
     unsigned long page = 0;

     if (buf == NULL) return;

     while (max) {
         if ((ptr & TARGET_PAGE_MASK) != page) {
             phys = (char *)v2p(ptr, 0);
             page = ptr & TARGET_PAGE_MASK;
         }
         *buf = *phys;
         if (*phys == '\0')
             return;
         ptr ++;
         buf ++;
         phys ++;
         max --;
     }
 }
 #endif
 #endif

 /* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating
    instructions into helper.c  */
 uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
         env->QF = 1;
     return res;
 }

 uint32_t HELPER(add_saturate)(uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
         env->QF = 1;
         res = ~(((int32_t)a >> 31) ^ SIGNBIT);
     }
     return res;
 }

 uint32_t HELPER(sub_saturate)(uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
         env->QF = 1;
         res = ~(((int32_t)a >> 31) ^ SIGNBIT);
     }
     return res;
 }

 uint32_t HELPER(double_saturate)(int32_t val)
 {
     uint32_t res;
     if (val >= 0x40000000) {
         res = ~SIGNBIT;
         env->QF = 1;
     } else if (val <= (int32_t)0xc0000000) {
         res = SIGNBIT;
         env->QF = 1;
     } else {
         res = val << 1;
     }
     return res;
 }

 uint32_t HELPER(add_usaturate)(uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (res < a) {
         env->QF = 1;
         res = ~0;
     }
     return res;
 }

 uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (res > a) {
         env->QF = 1;
         res = 0;
     }
     return res;
 }

 /* Signed saturation.  */
 static inline uint32_t do_ssat(int32_t val, int shift)
 {
     int32_t top;
     uint32_t mask;

     top = val >> shift;
     mask = (1u << shift) - 1;
     if (top > 0) {
         env->QF = 1;
         return mask;
     } else if (top < -1) {
         env->QF = 1;
         return ~mask;
     }
     return val;
 }

 /* Unsigned saturation.  */
 static inline uint32_t do_usat(int32_t val, int shift)
 {
     uint32_t max;

     max = (1u << shift) - 1;
     if (val < 0) {
         env->QF = 1;
         return 0;
     } else if (val > max) {
         env->QF = 1;
         return max;
     }
     return val;
 }

 /* Signed saturate.  */
 uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
 {
     return do_ssat(x, shift);
 }

 /* Dual halfword signed saturate.  */
 uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
 {
     uint32_t res;

     res = (uint16_t)do_ssat((int16_t)x, shift);
     res |= do_ssat(((int32_t)x) >> 16, shift) << 16;
     return res;
 }

 /* Unsigned saturate.  */
 uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
 {
     return do_usat(x, shift);
 }

 /* Dual halfword unsigned saturate.  */
 uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
 {
     uint32_t res;

     res = (uint16_t)do_usat((int16_t)x, shift);
     res |= do_usat(((int32_t)x) >> 16, shift) << 16;
     return res;
 }

 void HELPER(wfi)(void)
 {
     env->exception_index = EXCP_HLT;
     env->halted = 1;
     cpu_loop_exit();
 }

 void HELPER(exception)(uint32_t excp)
 {
     env->exception_index = excp;
     cpu_loop_exit();
 }

 uint32_t HELPER(cpsr_read)(void)
 {
     return cpsr_read(env) & ~CPSR_EXEC;
 }

 void HELPER(cpsr_write)(uint32_t val, uint32_t mask)
 {
     cpsr_write(env, val, mask);
 }

 /* Access to user mode registers from privileged modes.  */
 uint32_t HELPER(get_user_reg)(uint32_t regno)
 {
     uint32_t val;

     if (regno == 13) {
         val = env->banked_r13[0];
     } else if (regno == 14) {
         val = env->banked_r14[0];
     } else if (regno >= 8
                && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
         val = env->usr_regs[regno - 8];
     } else {
         val = env->regs[regno];
     }
     return val;
 }

 void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
 {
     if (regno == 13) {
         env->banked_r13[0] = val;
     } else if (regno == 14) {
         env->banked_r14[0] = val;
     } else if (regno >= 8
                && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
         env->usr_regs[regno - 8] = val;
     } else {
         env->regs[regno] = val;
     }
 }

 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */

 uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = T0 + T1;
     env->NF = env->ZF = result;
     env->CF = result < a;
     env->VF = (a ^ b ^ -1) & (a ^ result);
     return result;
 }

 uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
         result = a + b;
         env->CF = result < a;
     } else {
         result = a + b + 1;
         env->CF = result <= a;
     }
     env->VF = (a ^ b ^ -1) & (a ^ result);
     env->NF = env->ZF = result;
     return result;
 }

 uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a - b;
     env->NF = env->ZF = result;
     env->CF = a >= b;
     env->VF = (a ^ b) & (a ^ result);
     return result;
 }

 uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
         result = a - b - 1;
         env->CF = a > b;
     } else {
         result = a - b;
         env->CF = a >= b;
     }
     env->VF = (a ^ b) & (a ^ result);
     env->NF = env->ZF = result;
     return result;
 }

 /* Similarly for variable shift instructions.  */

 uint32_t HELPER(shl)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32)
         return 0;
     return x << shift;
 }

 uint32_t HELPER(shr)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32)
         return 0;
     return (uint32_t)x >> shift;
 }

 uint32_t HELPER(sar)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32)
         shift = 31;
     return (int32_t)x >> shift;
 }

 uint32_t HELPER(ror)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift == 0)
         return x;
     return (x >> shift) | (x << (32 - shift));
 }

 uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
         if (shift == 32)
             env->CF = x & 1;
         else
             env->CF = 0;
         return 0;
     } else if (shift != 0) {
         env->CF = (x >> (32 - shift)) & 1;
         return x << shift;
     }
     return x;
 }

 uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
         if (shift == 32)
             env->CF = (x >> 31) & 1;
         else
             env->CF = 0;
         return 0;
     } else if (shift != 0) {
         env->CF = (x >> (shift - 1)) & 1;
         return x >> shift;
     }
     return x;
 }

 uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
         env->CF = (x >> 31) & 1;
         return (int32_t)x >> 31;
     } else if (shift != 0) {
         env->CF = (x >> (shift - 1)) & 1;
         return (int32_t)x >> shift;
     }
     return x;
 }

 uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
 {
     int shift1, shift;
     shift1 = i & 0xff;
     shift = shift1 & 0x1f;
     if (shift == 0) {
         if (shift1 != 0)
             env->CF = (x >> 31) & 1;
         return x;
     } else {
         env->CF = (x >> (shift - 1)) & 1;
         return ((uint32_t)x >> shift) | (x << (32 - shift));
     }
 }

 uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2)
 {
     uint64_t res;

     res = src1 + src2;
     if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
         env->QF = 1;
         res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
     }
     return res;
 }

 uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2)
 {
     uint64_t res;

     res = src1 + src2;
     if (res < src1) {
         env->QF = 1;
         res = ~(uint64_t)0;
     }
     return res;
 }

 uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2)
 {
     uint64_t res;

     res = src1 - src2;
     if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
         env->QF = 1;
         res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
     }
     return res;
 }

 uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
 {
     uint64_t res;

     if (src1 < src2) {
         env->QF = 1;
         res = 0;
     } else {
         res = src1 - src2;
     }
     return res;
 }

 /* These need to return a pair of value, so still use T0/T1.  */
 /* Transpose.  Argument order is rather strange to avoid special casing
    the tranlation code.
    On input T0 = rm, T1 = rd.  On output T0 = rd, T1 = rm  */
 void HELPER(neon_trn_u8)(void)
 {
     uint32_t rd;
     uint32_t rm;
     rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
     rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
     T0 = rd;
     T1 = rm;
     FORCE_RET();
 }

 void HELPER(neon_trn_u16)(void)
 {
     uint32_t rd;
     uint32_t rm;
     rd = (T0 << 16) | (T1 & 0xffff);
     rm = (T1 >> 16) | (T0 & 0xffff0000);
     T0 = rd;
     T1 = rm;
     FORCE_RET();
 }

 /* Worker routines for zip and unzip.  */
 void HELPER(neon_unzip_u8)(void)
 {
     uint32_t rd;
     uint32_t rm;
     rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
          | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
     rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
          | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
     T0 = rd;
     T1 = rm;
     FORCE_RET();
 }

 void HELPER(neon_zip_u8)(void)
 {
     uint32_t rd;
     uint32_t rm;
     rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
          | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
     rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
          | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
     T0 = rd;
     T1 = rm;
     FORCE_RET();
 }

 void HELPER(neon_zip_u16)(void)
 {
     uint32_t tmp;

     tmp = (T0 & 0xffff) | (T1 << 16);
     T1 = (T1 & 0xffff0000) | (T0 >> 16);
     T0 = tmp;
     FORCE_RET();
 }
	/*
	* ARM helper routines
	*
	* Copyright (c) 2005-2007 CodeSourcery, LLC
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library; if not, write to the Free Software
	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	*/
	#include "exec.h"
	#include "helpers.h"

	#define SIGNBIT (uint32_t)0x80000000
	#define SIGNBIT64 ((uint64_t)1 << 63)

	void raise_exception(int tt)
	{
	env->exception_index = tt;
	cpu_loop_exit();
	}

	/* thread support */

	spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;

	void cpu_lock(void)
	{
	spin_lock(&global_cpu_lock);
	}

	void cpu_unlock(void)
	{
	spin_unlock(&global_cpu_lock);
	}

	uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
	uint32_t rn, uint32_t maxindex)
	{
	uint32_t val;
	uint32_t tmp;
	int index;
	int shift;
	uint64_t *table;
	table = (uint64_t *)&env->vfp.regs[rn];
	val = 0;
	for (shift = 0; shift < 32; shift += 8) {
	index = (ireg >> shift) & 0xff;
	if (index < maxindex) {
	tmp = (table[index >> 3] >> (index & 7)) & 0xff;
	val \|= tmp << shift;
	} else {
	val \|= def & (0xff << shift);
	}
	}
	return val;
	}

	#if !defined(CONFIG_USER_ONLY)

	static void do_unaligned_access (target_ulong addr, int is_write, int is_user, void *retaddr);

	#define MMUSUFFIX _mmu
	#define ALIGNED_ONLY 1

	#define SHIFT 0
	#include "softmmu_template.h"

	#define SHIFT 1
	#include "softmmu_template.h"

	#define SHIFT 2
	#include "softmmu_template.h"

	#define SHIFT 3
	#include "softmmu_template.h"

	static void do_unaligned_access (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
	{
	//printf("::UNALIGNED:: addr=%lx is_write=%d is_user=%d retaddr=%p\n", addr, is_write, is_user, retaddr);
	if (mmu_idx)
	{
	env = cpu_single_env;
	env->cp15.c5_data = 0x00000001; /* corresponds to an alignment fault */
	env->cp15.c6_data = addr;
	env->exception_index = EXCP_DATA_ABORT;
	cpu_loop_exit();
	}
	}

	/* try to fill the TLB and return an exception if error. If retaddr is
	NULL, it means that the function was called in C code (i.e. not
	from generated code or from helper.c) */
	/* XXX: fix it to restore all registers */
	void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
	{
	TranslationBlock *tb;
	CPUState *saved_env;
	unsigned long pc;
	int ret;

	/* XXX: hack to restore env in all cases, even if not called from
	generated code */
	saved_env = env;
	env = cpu_single_env;
	ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
	if (unlikely(ret)) {
	if (retaddr) {
	/* now we have a real cpu fault */
	pc = (unsigned long)retaddr;
	tb = tb_find_pc(pc);
	if (tb) {
	/* the PC is inside the translated code. It means that we have
	a virtual CPU fault */
	cpu_restore_state(tb, env, pc, NULL);
	}
	}
	raise_exception(env->exception_index);
	}
	env = saved_env;
	}

	#if 1
	#include <string.h>
	/*
	* The following functions are address translation helper functions
	* for fast memory access in QEMU.
	*/
	static target_phys_addr_t v2p_mmu(target_ulong addr, int mmu_idx)
	{
	int index;
	target_ulong tlb_addr;
	target_phys_addr_t physaddr;
	void *retaddr;

	index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
	redo:
	tlb_addr = env->tlb_table[mmu_idx][index].addr_read;
	if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK \| TLB_INVALID_MASK))) {
	physaddr = addr + env->tlb_table[mmu_idx][index].addend;
	} else {
	/* the page is not in the TLB : fill it */
	retaddr = GETPC();
	tlb_fill(addr, 0, mmu_idx, retaddr);
	goto redo;
	}
	return physaddr;
	}

	/*
	* translation from virtual address of simulated OS
	* to the address of simulation host (not the physical
	* address of simulated OS.
	*/
	target_phys_addr_t v2p(target_ulong ptr, int mmu_idx)
	{
	CPUState *saved_env;
	int index;
	target_ulong addr;
	target_phys_addr_t physaddr;

	saved_env = env;
	env = cpu_single_env;
	addr = ptr;
	index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
	if (__builtin_expect(env->tlb_table[mmu_idx][index].addr_read !=
	(addr & TARGET_PAGE_MASK), 0))
	{
	physaddr = v2p_mmu(addr, mmu_idx);
	} else {
	physaddr = (target_phys_addr_t)addr + env->tlb_table[mmu_idx][index].addend;
	}
	env = saved_env;
	return physaddr;
	}

	#define MINSIZE(x,y) ((x) < (y) ? (x) : (y))
	/* copy memory from the simulated virtual space to a buffer in QEMU */
	void vmemcpy(target_ulong ptr, char *buf, int size)
	{
	if (buf == NULL) return;
	while (size) {
	int page_remain = TARGET_PAGE_SIZE - (ptr & ~TARGET_PAGE_MASK);
	int to_copy = MINSIZE(size, page_remain);
	char phys = (char )v2p(ptr, 0);
	if (phys == NULL) return;
	memcpy(buf, phys, to_copy);
	ptr += to_copy;
	buf += to_copy;
	size -= to_copy;
	}
	}

	/* copy memory from the QEMU buffer to simulated virtual space */
	void pmemcpy(target_ulong ptr, const char *buf, int size)
	{
	if (buf == NULL) return;
	while (size) {
	int page_remain = TARGET_PAGE_SIZE - (ptr & ~TARGET_PAGE_MASK);
	int to_copy = MINSIZE(size, page_remain);
	char phys = (char )v2p(ptr, 0);
	if (phys == NULL) return;
	memcpy(phys, buf, to_copy);
	ptr += to_copy;
	buf += to_copy;
	size -= to_copy;
	}
	}

	/* copy a string from the simulated virtual space to a buffer in QEMU */
	void vstrcpy(target_ulong ptr, char *buf, int max)
	{
	char *phys = 0;
	unsigned long page = 0;

	if (buf == NULL) return;

	while (max) {
	if ((ptr & TARGET_PAGE_MASK) != page) {
	phys = (char *)v2p(ptr, 0);
	page = ptr & TARGET_PAGE_MASK;
	}
	buf = phys;
	if (*phys == '\0')
	return;
	ptr ++;
	buf ++;
	phys ++;
	max --;
	}
	}
	#endif
	#endif

	/* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating
	instructions into helper.c */
	uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
	{
	uint32_t res = a + b;
	if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
	env->QF = 1;
	return res;
	}

	uint32_t HELPER(add_saturate)(uint32_t a, uint32_t b)
	{
	uint32_t res = a + b;
	if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
	env->QF = 1;
	res = ~(((int32_t)a >> 31) ^ SIGNBIT);
	}
	return res;
	}

	uint32_t HELPER(sub_saturate)(uint32_t a, uint32_t b)
	{
	uint32_t res = a - b;
	if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
	env->QF = 1;
	res = ~(((int32_t)a >> 31) ^ SIGNBIT);
	}
	return res;
	}

	uint32_t HELPER(double_saturate)(int32_t val)
	{
	uint32_t res;
	if (val >= 0x40000000) {
	res = ~SIGNBIT;
	env->QF = 1;
	} else if (val <= (int32_t)0xc0000000) {
	res = SIGNBIT;
	env->QF = 1;
	} else {
	res = val << 1;
	}
	return res;
	}

	uint32_t HELPER(add_usaturate)(uint32_t a, uint32_t b)
	{
	uint32_t res = a + b;
	if (res < a) {
	env->QF = 1;
	res = ~0;
	}
	return res;
	}

	uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
	{
	uint32_t res = a - b;
	if (res > a) {
	env->QF = 1;
	res = 0;
	}
	return res;
	}

	/* Signed saturation. */
	static inline uint32_t do_ssat(int32_t val, int shift)
	{
	int32_t top;
	uint32_t mask;

	top = val >> shift;
	mask = (1u << shift) - 1;
	if (top > 0) {
	env->QF = 1;
	return mask;
	} else if (top < -1) {
	env->QF = 1;
	return ~mask;
	}
	return val;
	}

	/* Unsigned saturation. */
	static inline uint32_t do_usat(int32_t val, int shift)
	{
	uint32_t max;

	max = (1u << shift) - 1;
	if (val < 0) {
	env->QF = 1;
	return 0;
	} else if (val > max) {
	env->QF = 1;
	return max;
	}
	return val;
	}

	/* Signed saturate. */
	uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
	{
	return do_ssat(x, shift);
	}

	/* Dual halfword signed saturate. */
	uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
	{
	uint32_t res;

	res = (uint16_t)do_ssat((int16_t)x, shift);
	res \|= do_ssat(((int32_t)x) >> 16, shift) << 16;
	return res;
	}

	/* Unsigned saturate. */
	uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
	{
	return do_usat(x, shift);
	}

	/* Dual halfword unsigned saturate. */
	uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
	{
	uint32_t res;

	res = (uint16_t)do_usat((int16_t)x, shift);
	res \|= do_usat(((int32_t)x) >> 16, shift) << 16;
	return res;
	}

	void HELPER(wfi)(void)
	{
	env->exception_index = EXCP_HLT;
	env->halted = 1;
	cpu_loop_exit();
	}

	void HELPER(exception)(uint32_t excp)
	{
	env->exception_index = excp;
	cpu_loop_exit();
	}

	uint32_t HELPER(cpsr_read)(void)
	{
	return cpsr_read(env) & ~CPSR_EXEC;
	}

	void HELPER(cpsr_write)(uint32_t val, uint32_t mask)
	{
	cpsr_write(env, val, mask);
	}

	/* Access to user mode registers from privileged modes. */
	uint32_t HELPER(get_user_reg)(uint32_t regno)
	{
	uint32_t val;

	if (regno == 13) {
	val = env->banked_r13[0];
	} else if (regno == 14) {
	val = env->banked_r14[0];
	} else if (regno >= 8
	&& (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
	val = env->usr_regs[regno - 8];
	} else {
	val = env->regs[regno];
	}
	return val;
	}

	void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
	{
	if (regno == 13) {
	env->banked_r13[0] = val;
	} else if (regno == 14) {
	env->banked_r14[0] = val;
	} else if (regno >= 8
	&& (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
	env->usr_regs[regno - 8] = val;
	} else {
	env->regs[regno] = val;
	}
	}

	/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
	The only way to do that in TCG is a conditional branch, which clobbers
	all our temporaries. For now implement these as helper functions. */

	uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
	{
	uint32_t result;
	result = T0 + T1;
	env->NF = env->ZF = result;
	env->CF = result < a;
	env->VF = (a ^ b ^ -1) & (a ^ result);
	return result;
	}

	uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
	{
	uint32_t result;
	if (!env->CF) {
	result = a + b;
	env->CF = result < a;
	} else {
	result = a + b + 1;
	env->CF = result <= a;
	}
	env->VF = (a ^ b ^ -1) & (a ^ result);
	env->NF = env->ZF = result;
	return result;
	}

	uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
	{
	uint32_t result;
	result = a - b;
	env->NF = env->ZF = result;
	env->CF = a >= b;
	env->VF = (a ^ b) & (a ^ result);
	return result;
	}

	uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
	{
	uint32_t result;
	if (!env->CF) {
	result = a - b - 1;
	env->CF = a > b;
	} else {
	result = a - b;
	env->CF = a >= b;
	}
	env->VF = (a ^ b) & (a ^ result);
	env->NF = env->ZF = result;
	return result;
	}

	/* Similarly for variable shift instructions. */

	uint32_t HELPER(shl)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift >= 32)
	return 0;
	return x << shift;
	}

	uint32_t HELPER(shr)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift >= 32)
	return 0;
	return (uint32_t)x >> shift;
	}

	uint32_t HELPER(sar)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift >= 32)
	shift = 31;
	return (int32_t)x >> shift;
	}

	uint32_t HELPER(ror)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift == 0)
	return x;
	return (x >> shift) \| (x << (32 - shift));
	}

	uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift >= 32) {
	if (shift == 32)
	env->CF = x & 1;
	else
	env->CF = 0;
	return 0;
	} else if (shift != 0) {
	env->CF = (x >> (32 - shift)) & 1;
	return x << shift;
	}
	return x;
	}

	uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift >= 32) {
	if (shift == 32)
	env->CF = (x >> 31) & 1;
	else
	env->CF = 0;
	return 0;
	} else if (shift != 0) {
	env->CF = (x >> (shift - 1)) & 1;
	return x >> shift;
	}
	return x;
	}

	uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
	{
	int shift = i & 0xff;
	if (shift >= 32) {
	env->CF = (x >> 31) & 1;
	return (int32_t)x >> 31;
	} else if (shift != 0) {
	env->CF = (x >> (shift - 1)) & 1;
	return (int32_t)x >> shift;
	}
	return x;
	}

	uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
	{
	int shift1, shift;
	shift1 = i & 0xff;
	shift = shift1 & 0x1f;
	if (shift == 0) {
	if (shift1 != 0)
	env->CF = (x >> 31) & 1;
	return x;
	} else {
	env->CF = (x >> (shift - 1)) & 1;
	return ((uint32_t)x >> shift) \| (x << (32 - shift));
	}
	}

	uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2)
	{
	uint64_t res;

	res = src1 + src2;
	if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
	env->QF = 1;
	res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
	}
	return res;
	}

	uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2)
	{
	uint64_t res;

	res = src1 + src2;
	if (res < src1) {
	env->QF = 1;
	res = ~(uint64_t)0;
	}
	return res;
	}

	uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2)
	{
	uint64_t res;

	res = src1 - src2;
	if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
	env->QF = 1;
	res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
	}
	return res;
	}

	uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
	{
	uint64_t res;

	if (src1 < src2) {
	env->QF = 1;
	res = 0;
	} else {
	res = src1 - src2;
	}
	return res;
	}

	/* These need to return a pair of value, so still use T0/T1. */
	/* Transpose. Argument order is rather strange to avoid special casing
	the tranlation code.
	On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */
	void HELPER(neon_trn_u8)(void)
	{
	uint32_t rd;
	uint32_t rm;
	rd = ((T0 & 0x00ff00ff) << 8) \| (T1 & 0x00ff00ff);
	rm = ((T1 & 0xff00ff00) >> 8) \| (T0 & 0xff00ff00);
	T0 = rd;
	T1 = rm;
	FORCE_RET();
	}

	void HELPER(neon_trn_u16)(void)
	{
	uint32_t rd;
	uint32_t rm;
	rd = (T0 << 16) \| (T1 & 0xffff);
	rm = (T1 >> 16) \| (T0 & 0xffff0000);
	T0 = rd;
	T1 = rm;
	FORCE_RET();
	}

	/* Worker routines for zip and unzip. */
	void HELPER(neon_unzip_u8)(void)
	{
	uint32_t rd;
	uint32_t rm;
	rd = (T0 & 0xff) \| ((T0 >> 8) & 0xff00)
	\| ((T1 << 16) & 0xff0000) \| ((T1 << 8) & 0xff000000);
	rm = ((T0 >> 8) & 0xff) \| ((T0 >> 16) & 0xff00)
	\| ((T1 << 8) & 0xff0000) \| (T1 & 0xff000000);
	T0 = rd;
	T1 = rm;
	FORCE_RET();
	}

	void HELPER(neon_zip_u8)(void)
	{
	uint32_t rd;
	uint32_t rm;
	rd = (T0 & 0xff) \| ((T1 << 8) & 0xff00)
	\| ((T0 << 16) & 0xff0000) \| ((T1 << 24) & 0xff000000);
	rm = ((T0 >> 16) & 0xff) \| ((T1 >> 8) & 0xff00)
	\| ((T0 >> 8) & 0xff0000) \| (T1 & 0xff000000);
	T0 = rd;
	T1 = rm;
	FORCE_RET();
	}

	void HELPER(neon_zip_u16)(void)
	{
	uint32_t tmp;

	tmp = (T0 & 0xffff) \| (T1 << 16);
	T1 = (T1 & 0xffff0000) \| (T0 >> 16);
	T0 = tmp;
	FORCE_RET();
	}