This commit is for Bugzilla 334834.  The Bugzilla contains patch 2 of 3
to add PPC64 LE support.  The other two patches can be found in Bugzillas
334384 and 334836.

POWER PC, add the functional Little Endian support, patch 2 VEX part

The IBM POWER processor now supports both Big Endian and Little Endian.
The ABI for Little Endian also changes.  Specifically, the function
descriptor is not used, the stack size changed, accessing the TOC
changed.  Functions now have a local and a global entry point.  Register
r2 contains the TOC for local calls and register r12 contains the TOC
for global calls.  This patch makes the functional changes to the
Valgrind tool.  The patch makes the changes needed for the
none/tests/ppc32 and none/tests/ppc64 Makefile.am.  A number of the
ppc specific tests have Endian dependencies that are not fixed in
this patch.  They are fixed in the next patch.

Per Julian's comments renamed coregrind/m_dispatch/dispatch-ppc64-linux.S
to coregrind/m_dispatch/dispatch-ppc64be-linux.S  Created new file for LE
coregrind/m_dispatch/dispatch-ppc64le-linux.S.  The same was done for
coregrind/m_syswrap/syscall-ppc-linux.S.

Signed-off-by: Carl Love <carll@us.ibm.com>

git-svn-id: svn://svn.valgrind.org/vex/trunk@2914 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_ppc_defs.h b/priv/guest_ppc_defs.h
index 944989d..3d677e8 100644
--- a/priv/guest_ppc_defs.h
+++ b/priv/guest_ppc_defs.h
@@ -161,7 +161,8 @@
 
 extern void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
                                      UInt vD_idx, UInt sh,
-                                     UInt shift_right );
+                                     UInt shift_right,
+                                     UInt endness );
 
 #endif /* ndef __VEX_GUEST_PPC_DEFS_H */
 
diff --git a/priv/guest_ppc_helpers.c b/priv/guest_ppc_helpers.c
index 2db109b..682aadc 100644
--- a/priv/guest_ppc_helpers.c
+++ b/priv/guest_ppc_helpers.c
@@ -153,10 +153,12 @@
 /* CALLED FROM GENERATED CODE */
 /* DIRTY HELPER (reads guest state, writes guest mem) */
 void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
-                              UInt vD_off, UInt sh, UInt shift_right )
+                              UInt vD_off, UInt sh, UInt shift_right,
+                              UInt endness )
 {
   UChar ref[32];
   ULong i;
+  Int k;
   /* ref[] used to be a static const array, but this doesn't work on
      ppc64 because VEX doesn't load the TOC pointer for the call here,
      and so we wind up picking up some totally random other data.
@@ -179,10 +181,19 @@
   pU128_src = (U128*)&ref[sh];
   pU128_dst = (U128*)( ((UChar*)gst) + vD_off );
 
-  (*pU128_dst)[0] = (*pU128_src)[0];
-  (*pU128_dst)[1] = (*pU128_src)[1];
-  (*pU128_dst)[2] = (*pU128_src)[2];
-  (*pU128_dst)[3] = (*pU128_src)[3];
+  if ((0x1 & endness) == 0x0) {
+     /* Little endian */
+     unsigned char *srcp, *dstp;
+     srcp = (unsigned char *)pU128_src;
+     dstp = (unsigned char *)pU128_dst;
+     for (k = 15; k >= 0; k--, srcp++)
+        dstp[k] = *srcp;
+  } else {
+     (*pU128_dst)[0] = (*pU128_src)[0];
+     (*pU128_dst)[1] = (*pU128_src)[1];
+     (*pU128_dst)[2] = (*pU128_src)[2];
+     (*pU128_dst)[3] = (*pU128_src)[3];
+  }
 }
 
 
diff --git a/priv/guest_ppc_toIR.c b/priv/guest_ppc_toIR.c
index 2b4d4bd..ee11473 100644
--- a/priv/guest_ppc_toIR.c
+++ b/priv/guest_ppc_toIR.c
@@ -97,7 +97,8 @@
 
       7C210B78 (or 1,1,1)   %R3 = client_request ( %R4 )
       7C421378 (or 2,2,2)   %R3 = guest_NRADDR
-      7C631B78 (or 3,3,3)   branch-and-link-to-noredir %R11
+      7C631B78 (or 3,3,3)   branch-and-link-to-noredir %R11  Big endian
+      7C631B78 (or 3,3,3)   branch-and-link-to-noredir %R12  Little endian
       7C842378 (or 4,4,4)   %R3 = guest_NRADDR_GPR2
       7CA52B78 (or 5,5,5)   IR injection
 
@@ -107,7 +108,55 @@
    fragments designed for Valgrind to catch.
 */
 
-
+/*  Little Endian notes  */
+/*
+ * Vector operations in little Endian mode behave in non-obvious ways at times.
+ * Below is an attempt at explaining this.
+ *
+ * LE/BE vector example
+ *   With a vector of unsigned ints declared as follows:
+ *     vector unsigned int vec_inA =
+                            { 0x11111111, 0x22222222, 0x33333333, 0x44444444 };
+ *   The '0x11111111' word is word zero in both LE and BE format.  But the
+ *   loaded vector register will have word zero on the far left in BE mode and
+ *   on the far right in LE mode. The lvx and stvx instructions work naturally
+ *   for whatever endianness is in effect.  For example, in LE mode, the stvx
+ *   stores word zero (far right word) of the vector at the lowest memory
+ *   address of the EA; in BE mode, stvx still stores word zero at the lowest
+ *   memory address, but with word zero interpreted as the one at the far left
+ *   of the register.
+ *
+ *   The lxvd2x and stxvd2x instructions are not so well suited for LE mode.
+ *   When the compiler generates an lxvd2x instruction to load the
+ *   above-declared vector of unsigned integers, it loads the vector as two
+ *   double words, but they are in BE word-wise format.  To put the vector in
+ *   the right order for LE, the compiler also generates an xxswapd after the
+ *   load, which puts it in proper LE format.  Similarly, the stxvd2x
+ *   instruction has a BE bias, storing the vector in BE word-wise format. But
+ *   the compiler also generates an xxswapd prior to the store, thus ensuring
+ *   the vector is stored in memory in the correct LE order.
+ *
+ *   Vector-flavored Iops, such Iop_V128Hito64, reference the hi and lo parts
+ *   of a double words and words within a vector.  Because of the reverse order
+ *   of numbering for LE as described above, the high part refers to word 1 in
+ *   LE format. When input data is saved to a guest state vector register
+ *   (e.g., via Iop_64HLtoV128), it is first saved to memory and then the
+ *   register is loaded via PPCInstr_AvLdSt, which does an lvx instruction.
+ *   The saving of the data to memory must be done in proper LE order.  For the
+ *   inverse operation of extracting data from a vector register (e.g.,
+ *   Iop_V128Hito64), the register is first saved (by PPCInstr_AvLdSt resulting
+ *   in stvx), and then integer registers are loaded from the memory location
+ *   from where the vector register was saved.  Again, this must be done in
+ *   proper LE order.  So for these various vector Iops, we have LE-specific
+ *   code in host_ppc_isel.c
+ *
+ *   Another unique behavior of vectors in LE mode is with the vector scalar
+ *   (VSX) operations that operate on "double word 0" of the source register,
+ *   storing the result in "double word 0" of the output vector register.  For
+ *   these operations, "double word 0" is interpreted as "high half of the
+ *   register" (i.e, the part on the left side).
+ *
+ */
 /* Translates PPC32/64 code to IR. */
 
 /* References
@@ -143,7 +192,6 @@
 #include "guest_generic_bb_to_IR.h"
 #include "guest_ppc_defs.h"
 
-
 /*------------------------------------------------------------*/
 /*--- Globals                                              ---*/
 /*------------------------------------------------------------*/
@@ -503,15 +551,22 @@
    return (ULong)((((Long)x) << 32) >> 32);
 }
 
-/* Do a big-endian load of a 32-bit word, regardless of the endianness
+/* Do a proper-endian load of a 32-bit word, regardless of the endianness
    of the underlying host. */
-static UInt getUIntBigendianly ( UChar* p )
+static UInt getUIntPPCendianly ( UChar* p )
 {
    UInt w = 0;
-   w = (w << 8) | p[0];
-   w = (w << 8) | p[1];
-   w = (w << 8) | p[2];
-   w = (w << 8) | p[3];
+   if (host_endness == VexEndnessBE) {
+       w = (w << 8) | p[0];
+       w = (w << 8) | p[1];
+       w = (w << 8) | p[2];
+       w = (w << 8) | p[3];
+   } else {
+       w = (w << 8) | p[3];
+       w = (w << 8) | p[2];
+       w = (w << 8) | p[1];
+       w = (w << 8) | p[0];
+   }
    return w;
 }
 
@@ -526,11 +581,15 @@
 }
 
 /* This generates a normal (non store-conditional) store. */
-static void storeBE ( IRExpr* addr, IRExpr* data )
+static void store ( IRExpr* addr, IRExpr* data )
 {
    IRType tyA = typeOfIRExpr(irsb->tyenv, addr);
    vassert(tyA == Ity_I32 || tyA == Ity_I64);
-   stmt( IRStmt_Store(Iend_BE, addr, data) );
+
+   if (host_endness == VexEndnessBE)
+      stmt( IRStmt_Store(Iend_BE, addr, data) );
+   else
+      stmt( IRStmt_Store(Iend_LE, addr, data) );
 }
 
 static IRExpr* unop ( IROp op, IRExpr* a )
@@ -586,9 +645,21 @@
 }
 
 /* This generates a normal (non load-linked) load. */
-static IRExpr* loadBE ( IRType ty, IRExpr* addr )
+static IRExpr* load ( IRType ty, IRExpr* addr )
 {
-   return IRExpr_Load(Iend_BE, ty, addr);
+   if (host_endness == VexEndnessBE)
+      return IRExpr_Load(Iend_BE, ty, addr);
+   else
+      return IRExpr_Load(Iend_LE, ty, addr);
+}
+
+static IRStmt* stmt_load ( IRTemp result,
+                           IRExpr* addr, IRExpr* storedata )
+{
+   if (host_endness == VexEndnessBE)
+      return IRStmt_LLSC(Iend_BE, result, addr, storedata);
+   else
+      return IRStmt_LLSC(Iend_LE, result, addr, storedata);
 }
 
 static IRExpr* mkOR1 ( IRExpr* arg1, IRExpr* arg2 )
@@ -1039,7 +1110,6 @@
    // jrs: probably not necessary; only matters if we reference sub-parts
    // of the ppc registers, but that isn't the case
    // later: this might affect Altivec though?
-   vassert(host_endness == VexEndnessBE);
 
    switch (archreg) {
    case  0: return offsetofPPCGuestState(guest_GPR0);
@@ -1101,40 +1171,78 @@
 {
    vassert(archreg < 32);
    
-   switch (archreg) {
-   case  0: return offsetofPPCGuestState(guest_VSR0);
-   case  1: return offsetofPPCGuestState(guest_VSR1);
-   case  2: return offsetofPPCGuestState(guest_VSR2);
-   case  3: return offsetofPPCGuestState(guest_VSR3);
-   case  4: return offsetofPPCGuestState(guest_VSR4);
-   case  5: return offsetofPPCGuestState(guest_VSR5);
-   case  6: return offsetofPPCGuestState(guest_VSR6);
-   case  7: return offsetofPPCGuestState(guest_VSR7);
-   case  8: return offsetofPPCGuestState(guest_VSR8);
-   case  9: return offsetofPPCGuestState(guest_VSR9);
-   case 10: return offsetofPPCGuestState(guest_VSR10);
-   case 11: return offsetofPPCGuestState(guest_VSR11);
-   case 12: return offsetofPPCGuestState(guest_VSR12);
-   case 13: return offsetofPPCGuestState(guest_VSR13);
-   case 14: return offsetofPPCGuestState(guest_VSR14);
-   case 15: return offsetofPPCGuestState(guest_VSR15);
-   case 16: return offsetofPPCGuestState(guest_VSR16);
-   case 17: return offsetofPPCGuestState(guest_VSR17);
-   case 18: return offsetofPPCGuestState(guest_VSR18);
-   case 19: return offsetofPPCGuestState(guest_VSR19);
-   case 20: return offsetofPPCGuestState(guest_VSR20);
-   case 21: return offsetofPPCGuestState(guest_VSR21);
-   case 22: return offsetofPPCGuestState(guest_VSR22);
-   case 23: return offsetofPPCGuestState(guest_VSR23);
-   case 24: return offsetofPPCGuestState(guest_VSR24);
-   case 25: return offsetofPPCGuestState(guest_VSR25);
-   case 26: return offsetofPPCGuestState(guest_VSR26);
-   case 27: return offsetofPPCGuestState(guest_VSR27);
-   case 28: return offsetofPPCGuestState(guest_VSR28);
-   case 29: return offsetofPPCGuestState(guest_VSR29);
-   case 30: return offsetofPPCGuestState(guest_VSR30);
-   case 31: return offsetofPPCGuestState(guest_VSR31);
-   default: break;
+   if (host_endness == VexEndnessLE) {
+      switch (archreg) {
+         case  0: return offsetofPPCGuestState(guest_VSR0 + 8);
+         case  1: return offsetofPPCGuestState(guest_VSR1 + 8);
+         case  2: return offsetofPPCGuestState(guest_VSR2 + 8);
+         case  3: return offsetofPPCGuestState(guest_VSR3 + 8);
+         case  4: return offsetofPPCGuestState(guest_VSR4 + 8);
+         case  5: return offsetofPPCGuestState(guest_VSR5 + 8);
+         case  6: return offsetofPPCGuestState(guest_VSR6 + 8);
+         case  7: return offsetofPPCGuestState(guest_VSR7 + 8);
+         case  8: return offsetofPPCGuestState(guest_VSR8 + 8);
+         case  9: return offsetofPPCGuestState(guest_VSR9 + 8);
+         case 10: return offsetofPPCGuestState(guest_VSR10 + 8);
+         case 11: return offsetofPPCGuestState(guest_VSR11 + 8);
+         case 12: return offsetofPPCGuestState(guest_VSR12 + 8);
+         case 13: return offsetofPPCGuestState(guest_VSR13 + 8);
+         case 14: return offsetofPPCGuestState(guest_VSR14 + 8);
+         case 15: return offsetofPPCGuestState(guest_VSR15 + 8);
+         case 16: return offsetofPPCGuestState(guest_VSR16 + 8);
+         case 17: return offsetofPPCGuestState(guest_VSR17 + 8);
+         case 18: return offsetofPPCGuestState(guest_VSR18 + 8);
+         case 19: return offsetofPPCGuestState(guest_VSR19 + 8);
+         case 20: return offsetofPPCGuestState(guest_VSR20 + 8);
+         case 21: return offsetofPPCGuestState(guest_VSR21 + 8);
+         case 22: return offsetofPPCGuestState(guest_VSR22 + 8);
+         case 23: return offsetofPPCGuestState(guest_VSR23 + 8);
+         case 24: return offsetofPPCGuestState(guest_VSR24 + 8);
+         case 25: return offsetofPPCGuestState(guest_VSR25 + 8);
+         case 26: return offsetofPPCGuestState(guest_VSR26 + 8);
+         case 27: return offsetofPPCGuestState(guest_VSR27 + 8);
+         case 28: return offsetofPPCGuestState(guest_VSR28 + 8);
+         case 29: return offsetofPPCGuestState(guest_VSR29 + 8);
+         case 30: return offsetofPPCGuestState(guest_VSR30 + 8);
+         case 31: return offsetofPPCGuestState(guest_VSR31 + 8);
+         default: break;
+      }
+   } else {
+      switch (archreg) {
+         case  0: return offsetofPPCGuestState(guest_VSR0);
+         case  1: return offsetofPPCGuestState(guest_VSR1);
+         case  2: return offsetofPPCGuestState(guest_VSR2);
+         case  3: return offsetofPPCGuestState(guest_VSR3);
+         case  4: return offsetofPPCGuestState(guest_VSR4);
+         case  5: return offsetofPPCGuestState(guest_VSR5);
+         case  6: return offsetofPPCGuestState(guest_VSR6);
+         case  7: return offsetofPPCGuestState(guest_VSR7);
+         case  8: return offsetofPPCGuestState(guest_VSR8);
+         case  9: return offsetofPPCGuestState(guest_VSR9);
+         case 10: return offsetofPPCGuestState(guest_VSR10);
+         case 11: return offsetofPPCGuestState(guest_VSR11);
+         case 12: return offsetofPPCGuestState(guest_VSR12);
+         case 13: return offsetofPPCGuestState(guest_VSR13);
+         case 14: return offsetofPPCGuestState(guest_VSR14);
+         case 15: return offsetofPPCGuestState(guest_VSR15);
+         case 16: return offsetofPPCGuestState(guest_VSR16);
+         case 17: return offsetofPPCGuestState(guest_VSR17);
+         case 18: return offsetofPPCGuestState(guest_VSR18);
+         case 19: return offsetofPPCGuestState(guest_VSR19);
+         case 20: return offsetofPPCGuestState(guest_VSR20);
+         case 21: return offsetofPPCGuestState(guest_VSR21);
+         case 22: return offsetofPPCGuestState(guest_VSR22);
+         case 23: return offsetofPPCGuestState(guest_VSR23);
+         case 24: return offsetofPPCGuestState(guest_VSR24);
+         case 25: return offsetofPPCGuestState(guest_VSR25);
+         case 26: return offsetofPPCGuestState(guest_VSR26);
+         case 27: return offsetofPPCGuestState(guest_VSR27);
+         case 28: return offsetofPPCGuestState(guest_VSR28);
+         case 29: return offsetofPPCGuestState(guest_VSR29);
+         case 30: return offsetofPPCGuestState(guest_VSR30);
+         case 31: return offsetofPPCGuestState(guest_VSR31);
+         default: break;
+      }
    }
    vpanic("floatGuestRegOffset(ppc)"); /*notreached*/
 }
@@ -4758,7 +4866,7 @@
    switch (opc1) {
    case 0x22: // lbz (Load B & Zero, PPC32 p433)
       DIP("lbz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I8, mkexpr(EA));
+      val = load(Ity_I8, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
       break;
       
@@ -4768,14 +4876,14 @@
          return False;
       }
       DIP("lbzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I8, mkexpr(EA));
+      val = load(Ity_I8, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
       
    case 0x2A: // lha (Load HW Alg, PPC32 p445)
       DIP("lha r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
       break;
 
@@ -4785,14 +4893,14 @@
          return False;
       }
       DIP("lhau r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
       
    case 0x28: // lhz (Load HW & Zero, PPC32 p450)
       DIP("lhz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
       break;
       
@@ -4802,14 +4910,14 @@
          return False;
       }
       DIP("lhzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I16, mkexpr(EA));
+      val = load(Ity_I16, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
    case 0x20: // lwz (Load W & Zero, PPC32 p460)
       DIP("lwz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I32, mkexpr(EA));
+      val = load(Ity_I32, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
       break;
       
@@ -4819,7 +4927,7 @@
          return False;
       }
       DIP("lwzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
-      val = loadBE(Ity_I32, mkexpr(EA));
+      val = load(Ity_I32, mkexpr(EA));
       putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
@@ -4838,14 +4946,14 @@
             vex_printf("dis_int_load(ppc)(lwzux,rA_addr|rD_addr)\n");
             return False;
          }
-         val = loadBE(Ity_I8, mkexpr(EA));
+         val = load(Ity_I8, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x057: // lbzx (Load B & Zero, Indexed, PPC32 p436)
          DIP("lbzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I8, mkexpr(EA));
+         val = load(Ity_I8, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
          break;
          
@@ -4855,14 +4963,14 @@
             return False;
          }
          DIP("lhaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x157: // lhax (Load HW Alg, Indexed, PPC32 p448)
          DIP("lhax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
          break;
          
@@ -4872,14 +4980,14 @@
             return False;
          }
          DIP("lhzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x117: // lhzx (Load HW & Zero, Indexed, PPC32 p453)
          DIP("lhzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I16, mkexpr(EA));
+         val = load(Ity_I16, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
          break;
 
@@ -4889,14 +4997,14 @@
             return False;
          }
          DIP("lwzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I32, mkexpr(EA));
+         val = load(Ity_I32, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x017: // lwzx (Load W & Zero, Indexed, PPC32 p463)
          DIP("lwzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         val = loadBE(Ity_I32, mkexpr(EA));
+         val = load(Ity_I32, mkexpr(EA));
          putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
          break;
 
@@ -4908,13 +5016,13 @@
             return False;
          }
          DIP("ldux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x015: // ldx (Load DWord, Indexed, PPC64 p476)
          DIP("ldx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
          break;
 
       case 0x175: // lwaux (Load W Alg, Update Indexed, PPC64 p501)
@@ -4924,14 +5032,14 @@
          }
          DIP("lwaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
          putIReg( rD_addr,
-                  unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) );
+                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA))) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x155: // lwax (Load W Alg, Indexed, PPC64 p502)
          DIP("lwax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
          putIReg( rD_addr,
-                  unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) );
+                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA))) );
          break;
 
       default:
@@ -4946,7 +5054,7 @@
       switch ((b1<<1) | b0) {
       case 0x0: // ld (Load DWord, PPC64 p472)
          DIP("ld r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
-         putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
          break;
 
       case 0x1: // ldu (Load DWord, Update, PPC64 p474)
@@ -4955,14 +5063,14 @@
             return False;
          }
          DIP("ldu r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
-         putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+         putIReg( rD_addr, load(Ity_I64, mkexpr(EA)) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x2: // lwa (Load Word Alg, PPC64 p499)
          DIP("lwa r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
          putIReg( rD_addr,
-                  unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) );
+                  unop(Iop_32Sto64, load(Ity_I32, mkexpr(EA))) );
          break;
 
       default:
@@ -4981,17 +5089,17 @@
        */
       // trap if EA misaligned on 16 byte address
       if (mode64) {
-         assign(high, loadBE(ty, mkexpr( EA ) ) );
-         assign(low, loadBE(ty, binop( Iop_Add64,
-                                       mkexpr( EA ),
-                                       mkU64( 8 ) ) ) );
+         assign(high, load(ty, mkexpr( EA ) ) );
+         assign(low, load(ty, binop( Iop_Add64,
+                                     mkexpr( EA ),
+                                     mkU64( 8 ) ) ) );
       } else {
-         assign(high, loadBE(ty, binop( Iop_Add32,
-                                        mkexpr( EA ),
-                                        mkU32( 4 ) ) ) );
-         assign(low, loadBE(ty, binop( Iop_Add32,
-                                        mkexpr( EA ),
-                                        mkU32( 12 ) ) ) );
+         assign(high, load(ty, binop( Iop_Add32,
+                                      mkexpr( EA ),
+                                      mkU32( 4 ) ) ) );
+         assign(low, load(ty, binop( Iop_Add32,
+                                      mkexpr( EA ),
+                                      mkU32( 12 ) ) ) );
       }
       gen_SIGBUS_if_misaligned( EA, 16 );
       putIReg( rD_addr,  mkexpr( high) );
@@ -5046,7 +5154,7 @@
    switch (opc1) {
    case 0x26: // stb (Store B, PPC32 p509)
       DIP("stb r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-      storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
       break;
        
    case 0x27: // stbu (Store B, Update, PPC32 p510)
@@ -5056,12 +5164,12 @@
       }
       DIP("stbu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       putIReg( rA_addr, mkexpr(EA) );
-      storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
       break;
 
    case 0x2C: // sth (Store HW, PPC32 p522)
       DIP("sth r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-      storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
       break;
       
    case 0x2D: // sthu (Store HW, Update, PPC32 p524)
@@ -5071,12 +5179,12 @@
       }
       DIP("sthu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       putIReg( rA_addr, mkexpr(EA) );
-      storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
       break;
 
    case 0x24: // stw (Store W, PPC32 p530)
       DIP("stw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-      storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
       break;
 
    case 0x25: // stwu (Store W, Update, PPC32 p534)
@@ -5086,7 +5194,7 @@
       }
       DIP("stwu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       putIReg( rA_addr, mkexpr(EA) );
-      storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+      store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
       break;
       
    /* X Form : all these use EA_indexed */
@@ -5104,12 +5212,12 @@
          }
          DIP("stbux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
          break;
          
       case 0x0D7: // stbx (Store B Indexed, PPC32 p512)
          DIP("stbx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
          break;
          
       case 0x1B7: // sthux (Store HW, Update Indexed, PPC32 p525)
@@ -5119,12 +5227,12 @@
          }
          DIP("sthux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
          break;
          
       case 0x197: // sthx (Store HW Indexed, PPC32 p526)
          DIP("sthx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
          break;
          
       case 0x0B7: // stwux (Store W, Update Indexed, PPC32 p535)
@@ -5134,12 +5242,12 @@
          }
          DIP("stwux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
          break;
 
       case 0x097: // stwx (Store W Indexed, PPC32 p536)
          DIP("stwx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+         store( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
          break;
          
 
@@ -5151,12 +5259,12 @@
          }
          DIP("stdux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         storeBE( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS) );
          break;
 
       case 0x095: // stdx (Store DWord Indexed, PPC64 p585)
          DIP("stdx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
-         storeBE( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS) );
          break;
 
       default:
@@ -5174,7 +5282,7 @@
             return False;
 
          DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
-         storeBE( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS) );
          break;
 
       case 0x1: // stdu (Store DWord, Update, PPC64 p583)
@@ -5183,7 +5291,7 @@
 
          DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
          putIReg( rA_addr, mkexpr(EA) );
-         storeBE( mkexpr(EA), mkexpr(rS) );
+         store( mkexpr(EA), mkexpr(rS) );
          break;
 
       case 0x2: { // stq (Store QuadWord, Update, PPC64 p583)
@@ -5205,9 +5313,9 @@
             assign( EA_lo, ea_rAor0_simm( rA_addr, simm16+12 ) );
          }
          putIReg( rA_addr, mkexpr(EA_hi) );
-         storeBE( mkexpr(EA_hi), mkexpr(rS) );
+         store( mkexpr(EA_hi), mkexpr(rS) );
          putIReg( rA_addr, mkexpr( EA_lo) );
-         storeBE( mkexpr(EA_lo), getIReg( rS_addr+1 ) );
+         store( mkexpr(EA_lo), getIReg( rS_addr+1 ) );
          break;
       }
       default:
@@ -5256,7 +5364,7 @@
       DIP("lmw r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
       for (r = rD_addr; r <= 31; r++) {
          irx_addr = binop(mkAdd, mkexpr(EA), mode64 ? mkU64(ea_off) : mkU32(ea_off));
-         putIReg( r, mkWidenFrom32(ty, loadBE(Ity_I32, irx_addr ),
+         putIReg( r, mkWidenFrom32(ty, load(Ity_I32, irx_addr ),
                                        False) );
          ea_off += 4;
       }
@@ -5266,7 +5374,7 @@
       DIP("stmw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
       for (r = rS_addr; r <= 31; r++) {
          irx_addr = binop(mkAdd, mkexpr(EA), mode64 ? mkU64(ea_off) : mkU32(ea_off));
-         storeBE( irx_addr, mkNarrowTo32(ty, getIReg(r)) );
+         store( irx_addr, mkNarrowTo32(ty, getIReg(r)) );
          ea_off += 4;
       }
       break;
@@ -5321,8 +5429,9 @@
                   Iop_Shl32, 
                   unop(
                      Iop_8Uto32, 
-                     loadBE(Ity_I8, 
-                            binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)))
+                     load( Ity_I8,
+                           binop( mkSzOp(ty,Iop_Add8),
+                                  e_EA, mkSzImm(ty,i)))
                   ), 
                   mkU8(toUChar(shift))
                )
@@ -5360,12 +5469,12 @@
       }
       /* *(EA+i) = 32to8(rS >> shift) */
       vassert(shift == 0 || shift == 8 || shift == 16 || shift == 24);
-      storeBE(
-         binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)),
-         unop(Iop_32to8,
-              binop(Iop_Shr32,
-                    mkNarrowTo32(ty, getIReg(rS)),
-                    mkU8(toUChar(shift))))
+      store(
+            binop( mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)),
+            unop( Iop_32to8,
+                  binop( Iop_Shr32,
+                         mkNarrowTo32( ty, getIReg(rS) ),
+                         mkU8( toUChar(shift) )))
       );
       shift -= 8;
    }
@@ -5404,10 +5513,10 @@
          /* Special case hack */
          /* rD = Mem[EA]; (rD+1)%32 = Mem[EA+4] */
          putIReg( rD_addr,          
-                  loadBE(Ity_I32, mkexpr(t_EA)) );
+                  load(Ity_I32, mkexpr(t_EA)) );
          putIReg( (rD_addr+1) % 32, 
-                  loadBE(Ity_I32,
-                         binop(Iop_Add32, mkexpr(t_EA), mkU32(4))) );
+                  load(Ity_I32,
+                       binop(Iop_Add32, mkexpr(t_EA), mkU32(4))) );
       } else {
          t_nbytes = newTemp(Ity_I32);
          assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) );
@@ -5439,10 +5548,10 @@
       if (NumBytes == 8 && !mode64) {
          /* Special case hack */
          /* Mem[EA] = rD; Mem[EA+4] = (rD+1)%32 */
-         storeBE( mkexpr(t_EA), 
-                  getIReg(rD_addr) );
-         storeBE( binop(Iop_Add32, mkexpr(t_EA), mkU32(4)), 
-                  getIReg((rD_addr+1) % 32) );
+         store( mkexpr(t_EA),
+                getIReg(rD_addr) );
+         store( binop(Iop_Add32, mkexpr(t_EA), mkU32(4)),
+                getIReg((rD_addr+1) % 32) );
       } else {
          t_nbytes = newTemp(Ity_I32);
          assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) );
@@ -6143,7 +6252,7 @@
 
          // and actually do the load
          res = newTemp(Ity_I32);
-         stmt( IRStmt_LLSC(Iend_BE, res, mkexpr(EA), NULL/*this is a load*/) );
+         stmt( stmt_load(res, mkexpr(EA), NULL/*this is a load*/) );
 
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(res), False) );
          break;
@@ -6169,7 +6278,7 @@
 
          // Do the store, and get success/failure bit into resSC
          resSC = newTemp(Ity_I1);
-         stmt( IRStmt_LLSC(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) );
+         stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS)) );
 
          // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
          // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]  on success
@@ -6236,7 +6345,7 @@
 
          // and actually do the load
          res = newTemp(Ity_I64);
-         stmt( IRStmt_LLSC(Iend_BE, res, mkexpr(EA), NULL/*this is a load*/) );
+         stmt( stmt_load( res, mkexpr(EA), NULL/*this is a load*/) );
 
          putIReg( rD_addr, mkexpr(res) );
          break;
@@ -6262,7 +6371,7 @@
 
          // Do the store, and get success/failure bit into resSC
          resSC = newTemp(Ity_I1);
-         stmt( IRStmt_LLSC(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) );
+         stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS)) );
 
          // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
          // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]  on success
@@ -6292,18 +6401,18 @@
 
          // and actually do the load
          if (mode64) {
-            stmt( IRStmt_LLSC( Iend_BE, res_hi,
-                               mkexpr(EA), NULL/*this is a load*/) );
-            stmt( IRStmt_LLSC( Iend_BE, res_lo,
-                               binop(Iop_Add64, mkexpr(EA), mkU64(8) ),
-                               NULL/*this is a load*/) );
+            stmt( stmt_load( res_hi,
+                             mkexpr(EA), NULL/*this is a load*/) );
+            stmt( stmt_load( res_lo,
+                             binop(Iop_Add64, mkexpr(EA), mkU64(8) ),
+                             NULL/*this is a load*/) );
          } else {
-            stmt( IRStmt_LLSC( Iend_BE, res_hi,
-                               binop( Iop_Add32, mkexpr(EA), mkU32(4) ),
-                               NULL/*this is a load*/) );
-            stmt( IRStmt_LLSC( Iend_BE, res_lo,
-                               binop( Iop_Add32, mkexpr(EA), mkU32(12) ),
-                               NULL/*this is a load*/) );
+            stmt( stmt_load( res_hi,
+                             binop( Iop_Add32, mkexpr(EA), mkU32(4) ),
+                             NULL/*this is a load*/) );
+            stmt( stmt_load( res_lo,
+                             binop( Iop_Add32, mkexpr(EA), mkU32(12) ),
+                             NULL/*this is a load*/) );
          }
          putIReg( rD_addr,   mkexpr(res_hi) );
          putIReg( rD_addr+1, mkexpr(res_lo) );
@@ -6332,14 +6441,14 @@
          resSC = newTemp(Ity_I1);
 
          if (mode64) {
-            stmt( IRStmt_LLSC( Iend_BE, resSC, mkexpr(EA), mkexpr(rS_hi) ) );
-            storeBE(binop( Iop_Add64, mkexpr(EA), mkU64(8) ), mkexpr(rS_lo) );
+            stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS_hi) ) );
+            store( binop( Iop_Add64, mkexpr(EA), mkU64(8) ), mkexpr(rS_lo) );
          } else {
-            stmt( IRStmt_LLSC( Iend_BE, resSC, binop( Iop_Add32,
-                                                      mkexpr(EA),
-                                                      mkU32(4) ),
-                                                      mkexpr(rS_hi) ) );
-            storeBE(binop(Iop_Add32, mkexpr(EA), mkU32(12) ), mkexpr(rS_lo) );
+            stmt( stmt_load( resSC, binop( Iop_Add32,
+                                           mkexpr(EA),
+                                           mkU32(4) ),
+                                           mkexpr(rS_hi) ) );
+            store( binop(Iop_Add32, mkexpr(EA), mkU32(12) ), mkexpr(rS_lo) );
          }
 
          // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
@@ -6662,7 +6771,7 @@
 
       case 0x316: // lhbrx (Load Halfword Byte-Reverse Indexed, PPC32 p449)
          DIP("lhbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, unop(Iop_16Uto32, loadBE(Ity_I16, mkexpr(EA))) );
+         assign( w1, unop(Iop_16Uto32, load(Ity_I16, mkexpr(EA))) );
          assign( w2, gen_byterev16(w1) );
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
                                          /* Signed */False) );
@@ -6670,7 +6779,7 @@
 
       case 0x216: // lwbrx (Load Word Byte-Reverse Indexed, PPC32 p459)
          DIP("lwbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, loadBE(Ity_I32, mkexpr(EA)) );
+         assign( w1, load(Ity_I32, mkexpr(EA)) );
          assign( w2, gen_byterev32(w1) );
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
                                          /* Signed */False) );
@@ -6682,26 +6791,29 @@
          IRTemp w3 = newTemp( Ity_I32 );
          IRTemp w4 = newTemp( Ity_I32 );
          DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, loadBE( Ity_I32, mkexpr( EA ) ) );
+         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
          assign( w2, gen_byterev32( w1 ) );
          nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                            ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
-         assign( w3, loadBE( Ity_I32, nextAddr ) );
+         assign( w3, load( Ity_I32, nextAddr ) );
          assign( w4, gen_byterev32( w3 ) );
-         putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+         if (host_endness == VexEndnessLE)
+            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
+         else
+            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
          break;
       }
 
       case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523)
          DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
-         storeBE( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)) );
+         store( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)) );
          break;
       
       case 0x296: // stwbrx (Store Word Byte-Reverse Indxd, PPC32 p531)
          DIP("stwbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
-         storeBE( mkexpr(EA), gen_byterev32(w1) );
+         store( mkexpr(EA), gen_byterev32(w1) );
          break;
 
       case 0x294: // stdbrx (Store Doubleword Byte-Reverse Indexed)
@@ -6713,8 +6825,9 @@
          DIP("stdbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          assign(lo, unop(Iop_64HIto32, mkexpr(rS)));
          assign(hi, unop(Iop_64to32, mkexpr(rS)));
-         storeBE( mkexpr( EA ),
-                  binop( Iop_32HLto64, gen_byterev32( hi ), gen_byterev32( lo ) ) );
+         store( mkexpr( EA ),
+                binop( Iop_32HLto64, gen_byterev32( hi ),
+                       gen_byterev32( lo ) ) );
          break;
       }
 
@@ -7232,7 +7345,7 @@
          
          for (i = 0; i < clearszB / 8; i++) {
             irx_addr = binop( Iop_Add64, mkexpr(addr), mkU64(i*8) );
-            storeBE( irx_addr, mkU64(0) );
+            store( irx_addr, mkU64(0) );
          }
       } else {
          /* Round EA down to the start of the containing block. */
@@ -7242,7 +7355,7 @@
          
          for (i = 0; i < clearszB / 4; i++) {
             irx_addr = binop( Iop_Add32, mkexpr(addr), mkU32(i*4) );
-            storeBE( irx_addr, mkU32(0) );
+            store( irx_addr, mkU32(0) );
          }
       }
       break;
@@ -7462,7 +7575,7 @@
       DIP("lfs fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rAor0_simm(rA_addr, simm16) );
       putFReg( frD_addr,
-               unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) );
+               unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA))) );
       break;
 
    case 0x31: // lfsu (Load Float Single, Update, PPC32 p442)
@@ -7471,14 +7584,14 @@
       DIP("lfsu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
       putFReg( frD_addr,
-               unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) );
+               unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA))) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
       
    case 0x32: // lfd (Load Float Double, PPC32 p437)
       DIP("lfd fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rAor0_simm(rA_addr, simm16) );
-      putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+      putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
       break;
 
    case 0x33: // lfdu (Load Float Double, Update, PPC32 p438)
@@ -7486,7 +7599,7 @@
          return False;
       DIP("lfdu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
-      putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+      putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
@@ -7501,7 +7614,7 @@
          DIP("lfsx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
          putFReg( frD_addr, unop( Iop_F32toF64, 
-                                  loadBE(Ity_F32, mkexpr(EA))) );
+                                  load(Ity_F32, mkexpr(EA))) );
          break;
          
       case 0x237: // lfsux (Load Float Single, Update Indxd, PPC32 p443)
@@ -7510,14 +7623,14 @@
          DIP("lfsux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
          putFReg( frD_addr,
-                  unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) );
+                  unop(Iop_F32toF64, load(Ity_F32, mkexpr(EA))) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x257: // lfdx (Load Float Double Indexed, PPC32 p440)
          DIP("lfdx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
-         putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+         putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
          break;
          
       case 0x277: // lfdux (Load Float Double, Update Indxd, PPC32 p439)
@@ -7525,14 +7638,14 @@
             return False;
          DIP("lfdux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
-         putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+         putFReg( frD_addr, load(Ity_F64, mkexpr(EA)) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
          
       case 0x357: // lfiwax (Load Float As Integer, Indxd, ISA 2.05 p120)
          DIP("lfiwax fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
-         assign( iLo, loadBE(Ity_I32, mkexpr(EA)) );
+         assign( iLo, load(Ity_I32, mkexpr(EA)) );
          assign( iHi, binop(Iop_Sub32,
                             mkU32(0),
                             binop(Iop_Shr32, mkexpr(iLo), mkU8(31)))  );
@@ -7545,7 +7658,7 @@
          IRTemp dw = newTemp( Ity_I64 );
          DIP("lfiwzx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
-         assign( iLo, loadBE(Ity_I32, mkexpr(EA)) );
+         assign( iLo, load(Ity_I32, mkexpr(EA)) );
          assign( dw, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( iLo ) ) );
          putFReg( frD_addr, unop( Iop_ReinterpI64asF64, mkexpr( dw ) ) );
          break;
@@ -7604,8 +7717,7 @@
       /* Use Iop_TruncF64asF32 to truncate and possible denormalise
          the value to be stored in the correct way, without any
          rounding. */
-      storeBE( mkexpr(EA),
-               unop(Iop_TruncF64asF32, mkexpr(frS)) );
+      store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)) );
       break;
 
    case 0x35: // stfsu (Store Float Single, Update, PPC32 p519)
@@ -7614,15 +7726,14 @@
       DIP("stfsu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
       /* See comment for stfs */
-      storeBE( mkexpr(EA),
-               unop(Iop_TruncF64asF32, mkexpr(frS)) );
+      store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
    case 0x36: // stfd (Store Float Double, PPC32 p513)
       DIP("stfd fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
       assign( EA, ea_rAor0_simm(rA_addr, simm16) );
-      storeBE( mkexpr(EA), mkexpr(frS) );
+      store( mkexpr(EA), mkexpr(frS) );
       break;
 
    case 0x37: // stfdu (Store Float Double, Update, PPC32 p514)
@@ -7630,7 +7741,7 @@
          return False;
       DIP("stfdu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
       assign( EA, ea_rA_simm(rA_addr, simm16) );
-      storeBE( mkexpr(EA), mkexpr(frS) );
+      store( mkexpr(EA), mkexpr(frS) );
       putIReg( rA_addr, mkexpr(EA) );
       break;
 
@@ -7644,8 +7755,8 @@
          DIP("stfsx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
          /* See note for stfs */
-         storeBE( mkexpr(EA), 
-                  unop(Iop_TruncF64asF32, mkexpr(frS)) );
+         store( mkexpr(EA),
+                unop(Iop_TruncF64asF32, mkexpr(frS)) );
          break;
          
       case 0x2B7: // stfsux (Store Float Sgl, Update Indxd, PPC32 p520)
@@ -7654,15 +7765,14 @@
          DIP("stfsux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
          /* See note for stfs */
-         storeBE( mkexpr(EA), 
-                  unop(Iop_TruncF64asF32, mkexpr(frS)) );
+         store( mkexpr(EA), unop(Iop_TruncF64asF32, mkexpr(frS)) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
       case 0x2D7: // stfdx (Store Float Double Indexed, PPC32 p516)
          DIP("stfdx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
-         storeBE( mkexpr(EA), mkexpr(frS) );
+         store( mkexpr(EA), mkexpr(frS) );
          break;
          
       case 0x2F7: // stfdux (Store Float Dbl, Update Indxd, PPC32 p515)
@@ -7670,7 +7780,7 @@
             return False;
          DIP("stfdux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
-         storeBE( mkexpr(EA), mkexpr(frS) );
+         store( mkexpr(EA), mkexpr(frS) );
          putIReg( rA_addr, mkexpr(EA) );
          break;
 
@@ -7678,8 +7788,8 @@
          // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
          DIP("stfiwx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
          assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
-         storeBE( mkexpr(EA),
-                  unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(frS))) );
+         store( mkexpr(EA),
+                unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(frS))) );
          break;
 
       default:
@@ -8865,11 +8975,11 @@
    assign( frT_lo, getFReg(frT_lo_addr) );
 
    if (is_load) {
-      putFReg( frT_hi_addr, loadBE(Ity_F64, mkexpr(EA_hi)) );
-      putFReg( frT_lo_addr, loadBE(Ity_F64, mkexpr(EA_lo)) );
+      putFReg( frT_hi_addr, load(Ity_F64, mkexpr(EA_hi)) );
+      putFReg( frT_lo_addr, load(Ity_F64, mkexpr(EA_lo)) );
    } else {
-      storeBE( mkexpr(EA_hi), mkexpr(frT_hi) );
-      storeBE( mkexpr(EA_lo), mkexpr(frT_lo) );
+      store( mkexpr(EA_hi), mkexpr(frT_hi) );
+      store( mkexpr(EA_lo), mkexpr(frT_lo) );
    }
 
    return True;
@@ -14767,6 +14877,7 @@
 static Bool
 dis_vxs_misc( UInt theInstr, UInt opc2 )
 {
+#define VG_PPC_SIGN_MASK 0x7fffffffffffffffULL
    /* XX3-Form and XX2-Form */
    UChar opc1 = ifieldOPC( theInstr );
    UChar XT = ifieldRegXT ( theInstr );
@@ -14793,7 +14904,20 @@
       {
          /* Move abs val of dw 0 of VSX[XB] to dw 0 of VSX[XT]. */
          IRTemp absVal = newTemp(Ity_V128);
-         assign(absVal, binop(Iop_ShrV128, binop(Iop_ShlV128, mkexpr(vB), mkU8(1)), mkU8(1)));
+         if (host_endness == VexEndnessLE) {
+            IRTemp hi64 = newTemp(Ity_I64);
+            IRTemp lo64 = newTemp(Ity_I64);
+            assign( hi64, unop( Iop_V128HIto64, mkexpr(vB) ) );
+            assign( lo64, unop( Iop_V128to64, mkexpr(vB) ) );
+            assign( absVal, binop( Iop_64HLtoV128,
+                                   binop( Iop_And64, mkexpr(hi64),
+                                          mkU64(VG_PPC_SIGN_MASK) ),
+                                   mkexpr(lo64) ) );
+         } else {
+            assign(absVal, binop(Iop_ShrV128,
+                                 binop(Iop_ShlV128, mkexpr(vB),
+                                       mkU8(1)), mkU8(1)));
+         }
          DIP("xsabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
          putVSReg(XT, mkexpr(absVal));
          break;
@@ -14801,51 +14925,73 @@
       case 0x2C0: // xscpsgndp
       {
          /* Scalar copy sign double-precision */
-         IRTemp vecA_signbit = newTemp(Ity_V128);
-         IRTemp vecB_no_signbit = newTemp(Ity_V128);
+         IRTemp vecA_signed = newTemp(Ity_I64);
+         IRTemp vecB_unsigned = newTemp(Ity_I64);
          IRTemp vec_result = newTemp(Ity_V128);
          DIP("xscpsgndp v%d,v%d v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
-         assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128,
-                                                             mkexpr( vB ),
-                                                             mkU8( 1 ) ),
-                                         mkU8( 1 ) ) );
-         assign( vecA_signbit, binop( Iop_ShlV128, binop( Iop_ShrV128,
-                                                          mkexpr( vA ),
-                                                          mkU8( 127 ) ),
-                                      mkU8( 127 ) ) );
-         assign( vec_result, binop( Iop_OrV128, mkexpr(vecA_signbit), mkexpr( vecB_no_signbit ) ) );
+         assign( vecA_signed, binop( Iop_And64,
+                                     unop( Iop_V128HIto64,
+                                           mkexpr(vA)),
+                                           mkU64(~VG_PPC_SIGN_MASK) ) );
+         assign( vecB_unsigned, binop( Iop_And64,
+                                       unop( Iop_V128HIto64,
+                                             mkexpr(vB) ),
+                                             mkU64(VG_PPC_SIGN_MASK) ) );
+         assign( vec_result, binop( Iop_64HLtoV128,
+                                    binop( Iop_Or64,
+                                           mkexpr(vecA_signed),
+                                           mkexpr(vecB_unsigned) ),
+                                    mkU64(0x0ULL)));
          putVSReg(XT, mkexpr(vec_result));
          break;
       }
       case 0x2D2: // xsnabsdp
       {
          /* Scalar negative absolute value double-precision */
-         IRTemp vec_neg_signbit = newTemp(Ity_V128);
+         IRTemp BHi_signed = newTemp(Ity_I64);
          DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
-         assign( vec_neg_signbit, unop( Iop_NotV128, binop( Iop_ShrV128,
-                                                            mkV128( 0xffff ),
-                                                            mkU8( 1 ) ) ) );
-         putVSReg(XT, binop(Iop_OrV128, mkexpr(vec_neg_signbit), mkexpr(vB)));
+         assign( BHi_signed, binop( Iop_Or64,
+                                    unop( Iop_V128HIto64,
+                                          mkexpr(vB) ),
+                                          mkU64(~VG_PPC_SIGN_MASK) ) );
+         putVSReg(XT, binop( Iop_64HLtoV128,
+                             mkexpr(BHi_signed), mkU64(0x0ULL) ) );
          break;
       }
       case 0x2F2: // xsnegdp
       {
          /* Scalar negate double-precision */
-         IRTemp vecB_no_signbit = newTemp(Ity_V128);
-         IRTemp vecB_signbit_comp = newTemp(Ity_V128);
+         IRTemp BHi_signed = newTemp(Ity_I64);
+         IRTemp BHi_unsigned = newTemp(Ity_I64);
+         IRTemp BHi_negated = newTemp(Ity_I64);
+         IRTemp BHi_negated_signbit = newTemp(Ity_I1);
+         IRTemp vec_result = newTemp(Ity_V128);
          DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
-         assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128,
-                                                             mkexpr( vB ),
-                                                             mkU8( 1 ) ),
-                                         mkU8( 1 ) ) );
-         assign( vecB_signbit_comp, binop( Iop_ShlV128,
-                                           unop( Iop_NotV128,
-                                                 binop( Iop_ShrV128,
-                                                        mkexpr( vB ),
-                                                        mkU8( 127 ) ) ),
-                                           mkU8( 127 ) ) );
-         putVSReg( XT, binop( Iop_OrV128, mkexpr( vecB_no_signbit ),
-                              mkexpr( vecB_signbit_comp ) ) );
+         assign( BHi_signed, unop( Iop_V128HIto64, mkexpr(vB) ) );
+         assign( BHi_unsigned, binop( Iop_And64, mkexpr(BHi_signed),
+                                      mkU64(VG_PPC_SIGN_MASK) ) );
+         assign( BHi_negated_signbit,
+                 unop( Iop_Not1,
+                       unop( Iop_32to1,
+                             binop( Iop_Shr32,
+                                    unop( Iop_64HIto32,
+                                          binop( Iop_And64,
+                                                 mkexpr(BHi_signed),
+                                                 mkU64(~VG_PPC_SIGN_MASK) )
+                                          ),
+                                    mkU8(31) ) ) ) );
+         assign( BHi_negated,
+                 binop( Iop_Or64,
+                        binop( Iop_32HLto64,
+                               binop( Iop_Shl32,
+                                      unop( Iop_1Uto32,
+                                            mkexpr(BHi_negated_signbit) ),
+                                      mkU8(31) ),
+                               mkU32(0) ),
+                        mkexpr(BHi_unsigned) ) );
+         assign( vec_result, binop( Iop_64HLtoV128, mkexpr(BHi_negated),
+                                    mkU64(0x0ULL)));
+         putVSReg( XT, mkexpr(vec_result));
          break;
       }
       case 0x280: // xsmaxdp (VSX Scalar Maximum Double-Precision)
@@ -15070,7 +15216,7 @@
    {
       IRExpr * exp;
       DIP("lxsiwzx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      exp = unop( Iop_64HIto32, loadBE( Ity_I64, mkexpr( EA ) ) );
+      exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
       putVSReg( XT, binop( Iop_64HLtoV128,
                            unop( Iop_32Uto64, exp),
                            mkU64(0) ) );
@@ -15080,7 +15226,7 @@
    {
       IRExpr * exp;
       DIP("lxsiwax %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      exp = unop( Iop_64HIto32, loadBE( Ity_I64, mkexpr( EA ) ) );
+      exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
       putVSReg( XT, binop( Iop_64HLtoV128,
                            unop( Iop_32Sto64, exp),
                            mkU64(0) ) );
@@ -15097,8 +15243,7 @@
       exp = unop( Iop_ReinterpF64asI64,
                   unop( Iop_F32toF64,
                         unop( Iop_ReinterpI32asF32,
-                              unop( Iop_64HIto32,
-                                    loadBE( Ity_I64, mkexpr( EA ) ) ) ) ) );
+                              load( Ity_I32, mkexpr( EA ) ) ) ) );
 
       putVSReg( XT, binop( Iop_64HLtoV128, exp, mkU64( 0 ) ) );
       break;
@@ -15107,7 +15252,7 @@
    {
       IRExpr * exp;
       DIP("lxsdx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      exp = loadBE( Ity_I64, mkexpr( EA ) );
+      exp = load( Ity_I64, mkexpr( EA ) );
       // We need to pass an expression of type Ity_V128 with putVSReg, but the load
       // we just performed is only a DW.  But since the contents of VSR[XT] element 1
       // are undefined after this operation, we can just do a splat op.
@@ -15121,10 +15266,10 @@
       ULong ea_off = 8;
       IRExpr* high_addr;
       DIP("lxvd2x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      high = loadBE( Ity_I64, mkexpr( EA ) );
+      high = load( Ity_I64, mkexpr( EA ) );
       high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off )
             : mkU32( ea_off ) );
-      low = loadBE( Ity_I64, high_addr );
+      low = load( Ity_I64, high_addr );
       putVSReg( XT, binop( Iop_64HLtoV128, high, low ) );
       break;
    }
@@ -15132,7 +15277,7 @@
    {
       IRTemp data = newTemp(Ity_I64);
       DIP("lxvdsx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      assign( data, loadBE( Ity_I64, mkexpr( EA ) ) );
+      assign( data, load( Ity_I64, mkexpr( EA ) ) );
       putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( data ), mkexpr( data ) ) );
       break;
    }
@@ -15143,19 +15288,19 @@
       IRExpr* irx_addr;
 
       DIP("lxvw4x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-      t3 = loadBE( Ity_I32,  mkexpr( EA ) );
+      t3 = load( Ity_I32,  mkexpr( EA ) );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t2 = loadBE( Ity_I32, irx_addr );
+      t2 = load( Ity_I32, irx_addr );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t1 = loadBE( Ity_I32, irx_addr );
+      t1 = load( Ity_I32, irx_addr );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      t0 = loadBE( Ity_I32, irx_addr );
+      t0 = load( Ity_I32, irx_addr );
       putVSReg( XT, binop( Iop_64HLtoV128, binop( Iop_32HLto64, t3, t2 ),
                            binop( Iop_32HLto64, t1, t0 ) ) );
       break;
@@ -15203,7 +15348,7 @@
       DIP("stxsiwx %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
       high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
       low32  = unop( Iop_64to32, high64 );
-      storeBE( mkexpr( EA ), low32 );
+      store( mkexpr( EA ), low32 );
       break;
    }
    case 0x28C:
@@ -15216,7 +15361,7 @@
       assign(val32, unop( Iop_ReinterpF32asI32,
                           unop( Iop_TruncF64asF32,
                                 mkexpr(high64) ) ) );
-      storeBE( mkexpr( EA ), mkexpr( val32 ) );
+      store( mkexpr( EA ), mkexpr( val32 ) );
       break;
    }
    case 0x2CC:
@@ -15224,7 +15369,7 @@
       IRExpr * high64;
       DIP("stxsdx %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
       high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
-      storeBE( mkexpr( EA ), high64 );
+      store( mkexpr( EA ), high64 );
       break;
    }
    case 0x3CC:
@@ -15233,9 +15378,9 @@
       DIP("stxvd2x %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
       high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
       low64 = unop( Iop_V128to64, mkexpr( vS ) );
-      storeBE( mkexpr( EA ), high64 );
-      storeBE( binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), ty == Ity_I64 ? mkU64( 8 )
-            : mkU32( 8 ) ), low64 );
+      store( mkexpr( EA ), high64 );
+      store( binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                    ty == Ity_I64 ? mkU64( 8 ) : mkU32( 8 ) ), low64 );
       break;
    }
    case 0x38C:
@@ -15251,20 +15396,19 @@
       // quad-word aligned.  Therefore, do 4 individual word-size stores.
       assign( hi64, unop( Iop_V128HIto64, mkexpr( vS ) ) );
       assign( lo64, unop( Iop_V128to64, mkexpr( vS ) ) );
-
-      storeBE( mkexpr( EA ), unop( Iop_64HIto32, mkexpr( hi64 ) ) );
+      store( mkexpr( EA ), unop( Iop_64HIto32, mkexpr( hi64 ) ) );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      storeBE( irx_addr, unop( Iop_64to32, mkexpr( hi64 ) ) );
+      store( irx_addr, unop( Iop_64to32, mkexpr( hi64 ) ) );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      storeBE( irx_addr, unop( Iop_64HIto32, mkexpr( lo64 ) ) );
+      store( irx_addr, unop( Iop_64HIto32, mkexpr( lo64 ) ) );
       ea_off += 4;
       irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
                         ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
-      storeBE( irx_addr, unop( Iop_64to32, mkexpr( lo64 ) ) );
+      store( irx_addr, unop( Iop_64to32, mkexpr( lo64 ) ) );
 
       break;
    }
@@ -15426,24 +15570,39 @@
    case 0x006: { // lvsl (Load Vector for Shift Left, AV p123)
       IRDirty* d;
       UInt vD_off = vectorGuestRegOffset(vD_addr);
-      IRExpr** args = mkIRExprVec_4(
+      IRExpr** args_be = mkIRExprVec_5(
                          IRExpr_BBPTR(),
-                         mkU32(vD_off), 
+                         mkU32(vD_off),
                          binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
                                           mkU32(0xF)),
-                         mkU32(0)/*left*/ );
+                         mkU32(0)/*left*/,
+                         mkU32(1)/*Big Endian*/);
+      IRExpr** args_le = mkIRExprVec_5(
+                         IRExpr_BBPTR(),
+                         mkU32(vD_off),
+                         binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+                                          mkU32(0xF)),
+                         mkU32(0)/*left*/,
+                         mkU32(0)/*Little Endian*/);
       if (!mode64) {
          d = unsafeIRDirty_0_N (
                         0/*regparms*/, 
                         "ppc32g_dirtyhelper_LVS",
                         fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS),
-                        args );
+                        args_be );
       } else {
-         d = unsafeIRDirty_0_N (
-                        0/*regparms*/, 
-                        "ppc64g_dirtyhelper_LVS",
-                        fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
-                        args );
+         if (host_endness == VexEndnessBE)
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
+                           args_be );
+         else
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           &ppc64g_dirtyhelper_LVS,
+                           args_le );
       }
       DIP("lvsl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* declare guest state effects */
@@ -15460,24 +15619,40 @@
    case 0x026: { // lvsr (Load Vector for Shift Right, AV p125)
       IRDirty* d;
       UInt vD_off = vectorGuestRegOffset(vD_addr);
-      IRExpr** args = mkIRExprVec_4(
-                         IRExpr_BBPTR(),
-                         mkU32(vD_off), 
-                         binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
-                                          mkU32(0xF)),
-                         mkU32(1)/*right*/ );
+      IRExpr** args_be = mkIRExprVec_5(
+                             IRExpr_BBPTR(),
+                             mkU32(vD_off),
+                             binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+                                              mkU32(0xF)),
+                             mkU32(1)/*right*/,
+                             mkU32(1)/*Big Endian*/);
+      IRExpr** args_le = mkIRExprVec_5(
+                             IRExpr_BBPTR(),
+                             mkU32(vD_off),
+                             binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+                                              mkU32(0xF)),
+                             mkU32(1)/*right*/,
+                             mkU32(0)/*Little Endian*/);
+
       if (!mode64) {
          d = unsafeIRDirty_0_N (
-                        0/*regparms*/, 
+                        0/*regparms*/,
                         "ppc32g_dirtyhelper_LVS",
                         fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS),
-                        args );
+                        args_be );
       } else {
-         d = unsafeIRDirty_0_N (
-                        0/*regparms*/, 
-                        "ppc64g_dirtyhelper_LVS",
-                        fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
-                        args );
+         if (host_endness == VexEndnessBE)
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
+                           args_be );
+         else
+            d = unsafeIRDirty_0_N (
+                           0/*regparms*/,
+                           "ppc64g_dirtyhelper_LVS",
+                           &ppc64g_dirtyhelper_LVS,
+                           args_le );
       }
       DIP("lvsr v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* declare guest state effects */
@@ -15496,29 +15671,29 @@
       /* loads addressed byte into vector[EA[0:3]
          since all other destination bytes are undefined,
          can simply load entire vector from 16-aligned EA */
-      putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
       break;
 
    case 0x027: // lvehx (Load Vector Element Half Word Indexed, AV p121)
       DIP("lvehx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* see note for lvebx */
-      putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
       break;
 
    case 0x047: // lvewx (Load Vector Element Word Indexed, AV p122)
       DIP("lvewx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
       /* see note for lvebx */
-      putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
       break;
 
    case 0x067: // lvx (Load Vector Indexed, AV p127)
       DIP("lvx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
-      putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
       break;
 
    case 0x167: // lvxl (Load Vector Indexed LRU, AV p128)
       DIP("lvxl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
-      putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+      putVReg( vD_addr, load(Ity_V128, mkexpr(EA_align16)) );
       break;
 
    default:
@@ -15562,12 +15737,16 @@
       assign( eb, binop(Iop_And8, mkU8(0xF),
                         unop(Iop_32to8,
                              mkNarrowTo32(ty, mkexpr(EA)) )) );
-      assign( idx, binop(Iop_Shl8,
-                         binop(Iop_Sub8, mkU8(15), mkexpr(eb)),
-                         mkU8(3)) );
-      storeBE( mkexpr(EA),
-               unop(Iop_32to8, unop(Iop_V128to32,
-                    binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
+     if (host_endness == VexEndnessLE) {
+         assign( idx, binop(Iop_Shl8, mkexpr(eb), mkU8(3)) );
+      } else {
+         assign( idx, binop(Iop_Shl8,
+                            binop(Iop_Sub8, mkU8(15), mkexpr(eb)),
+                            mkU8(3)) );
+      }
+      store( mkexpr(EA),
+             unop( Iop_32to8, unop(Iop_V128to32,
+                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
       break;
    }
    case 0x0A7: { // stvehx (Store Vector Half Word Indexed, AV p132)
@@ -15575,12 +15754,16 @@
       assign( addr_aligned, addr_align(mkexpr(EA), 2) );
       assign( eb, binop(Iop_And8, mkU8(0xF),
                         mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
-      assign( idx, binop(Iop_Shl8,
-                         binop(Iop_Sub8, mkU8(14), mkexpr(eb)),
-                         mkU8(3)) );
-      storeBE( mkexpr(addr_aligned),
-               unop(Iop_32to16, unop(Iop_V128to32,
-                    binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
+      if (host_endness == VexEndnessLE) {
+          assign( idx, binop(Iop_Shl8, mkexpr(eb), mkU8(3)) );
+      } else {
+         assign( idx, binop(Iop_Shl8,
+                            binop(Iop_Sub8, mkU8(14), mkexpr(eb)),
+                            mkU8(3)) );
+      }
+      store( mkexpr(addr_aligned),
+             unop( Iop_32to16, unop(Iop_V128to32,
+                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
       break;
    }
    case 0x0C7: { // stvewx (Store Vector Word Indexed, AV p133)
@@ -15588,23 +15771,27 @@
       assign( addr_aligned, addr_align(mkexpr(EA), 4) );
       assign( eb, binop(Iop_And8, mkU8(0xF),
                         mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
-      assign( idx, binop(Iop_Shl8,
-                         binop(Iop_Sub8, mkU8(12), mkexpr(eb)),
-                         mkU8(3)) );
-      storeBE( mkexpr(addr_aligned),
-               unop(Iop_V128to32,
-                    binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx))) );
+      if (host_endness == VexEndnessLE) {
+         assign( idx, binop(Iop_Shl8, mkexpr(eb), mkU8(3)) );
+      } else {
+         assign( idx, binop(Iop_Shl8,
+                            binop(Iop_Sub8, mkU8(12), mkexpr(eb)),
+                            mkU8(3)) );
+      }
+      store( mkexpr( addr_aligned),
+             unop( Iop_V128to32,
+                   binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx))) );
       break;
    }
 
    case 0x0E7: // stvx (Store Vector Indexed, AV p134)
       DIP("stvx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
-      storeBE( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
+      store( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
       break;
 
    case 0x1E7: // stvxl (Store Vector Indexed LRU, AV p135)
       DIP("stvxl v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
-      storeBE( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
+      store( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
       break;
 
    default:
@@ -18533,7 +18720,7 @@
    /* At least this is simple on PPC32: insns are all 4 bytes long, and
       4-aligned.  So just fish the whole thing out of memory right now
       and have done. */
-   theInstr = getUIntBigendianly( (UChar*)(&guest_code[delta]) );
+   theInstr = getUIntPPCendianly( (UChar*)(&guest_code[delta]) );
 
    if (0) vex_printf("insn: 0x%x\n", theInstr);
 
@@ -18558,12 +18745,12 @@
       UInt word2 = mode64 ? 0x78006800 : 0x5400683E;
       UInt word3 = mode64 ? 0x7800E802 : 0x5400E83E;
       UInt word4 = mode64 ? 0x78009802 : 0x5400983E;
-      if (getUIntBigendianly(code+ 0) == word1 &&
-          getUIntBigendianly(code+ 4) == word2 &&
-          getUIntBigendianly(code+ 8) == word3 &&
-          getUIntBigendianly(code+12) == word4) {
+      if (getUIntPPCendianly(code+ 0) == word1 &&
+          getUIntPPCendianly(code+ 4) == word2 &&
+          getUIntPPCendianly(code+ 8) == word3 &&
+          getUIntPPCendianly(code+12) == word4) {
          /* Got a "Special" instruction preamble.  Which one is it? */
-         if (getUIntBigendianly(code+16) == 0x7C210B78 /* or 1,1,1 */) {
+         if (getUIntPPCendianly(code+16) == 0x7C210B78 /* or 1,1,1 */) {
             /* %R3 = client_request ( %R4 ) */
             DIP("r3 = client_request ( %%r4 )\n");
             delta += 20;
@@ -18573,7 +18760,7 @@
             goto decode_success;
          }
          else
-         if (getUIntBigendianly(code+16) == 0x7C421378 /* or 2,2,2 */) {
+         if (getUIntPPCendianly(code+16) == 0x7C421378 /* or 2,2,2 */) {
             /* %R3 = guest_NRADDR */
             DIP("r3 = guest_NRADDR\n");
             delta += 20;
@@ -18582,18 +18769,27 @@
             goto decode_success;
          }
          else
-         if (getUIntBigendianly(code+16) == 0x7C631B78 /* or 3,3,3 */) {
-            /*  branch-and-link-to-noredir %R11 */
-            DIP("branch-and-link-to-noredir r11\n");
+         if (getUIntPPCendianly(code+16) == 0x7C631B78 /* or 3,3,3 */) {
             delta += 20;
-            putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
-            putGST( PPC_GST_CIA, getIReg(11));
+            if (host_endness == VexEndnessLE) {
+                /*  branch-and-link-to-noredir %R12 */
+                DIP("branch-and-link-to-noredir r12\n");
+                putGST( PPC_GST_LR,
+                        mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
+                putGST( PPC_GST_CIA, getIReg(12));
+            } else {
+                /*  branch-and-link-to-noredir %R11 */
+                DIP("branch-and-link-to-noredir r11\n");
+                putGST( PPC_GST_LR,
+                        mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
+                putGST( PPC_GST_CIA, getIReg(11));
+            }
             dres.jk_StopHere = Ijk_NoRedir;
             dres.whatNext    = Dis_StopHere;
             goto decode_success;
          }
          else
-         if (getUIntBigendianly(code+16) == 0x7C842378 /* or 4,4,4 */) {
+         if (getUIntPPCendianly(code+16) == 0x7C842378 /* or 4,4,4 */) {
             /* %R3 = guest_NRADDR_GPR2 */
             DIP("r3 = guest_NRADDR_GPR2\n");
             delta += 20;
@@ -18602,10 +18798,12 @@
             goto decode_success;
          }
          else
-         if (getUIntBigendianly(code+16) == 0x7CA52B78 /* or 5,5,5 */) {
+         if (getUIntPPCendianly(code+16) == 0x7CA52B78 /* or 5,5,5 */) {
             DIP("IR injection\n");
-
-            vex_inject_ir(irsb, Iend_BE);
+            if (host_endness == VexEndnessBE)
+               vex_inject_ir(irsb, Iend_BE);
+            else
+               vex_inject_ir(irsb, Iend_LE);
 
             delta += 20;
             dres.len = 20;
@@ -18625,7 +18823,7 @@
          }
          /* We don't know what it is.  Set opc1/opc2 so decode_failure
             can print the insn following the Special-insn preamble. */
-         theInstr = getUIntBigendianly(code+16);
+         theInstr = getUIntPPCendianly(code+16);
          opc1     = ifieldOPC(theInstr);
          opc2     = ifieldOPClo10(theInstr);
          goto decode_failure;
@@ -19321,7 +19519,7 @@
       case 0x32E: case 0x34E: case 0x36E: // tabortdc., tabortwci., tabortdci.
       case 0x38E: case 0x3AE: case 0x3EE: // tabort., treclaim., trechkpt.
       if (dis_transactional_memory( theInstr,
-                                    getUIntBigendianly( (UChar*)(&guest_code[delta + 4])),
+                                    getUIntPPCendianly( (UChar*)(&guest_code[delta + 4])),
                                     abiinfo, &dres,
                                     resteerOkFn, callback_opaque))
             goto decode_success;
@@ -19437,7 +19635,7 @@
       case 0x2F6: case 0x056: case 0x036: // dcba, dcbf,   dcbst
       case 0x116: case 0x0F6: case 0x3F6: // dcbt, dcbtst, dcbz
       case 0x3D6:                         // icbi
-         if (dis_cache_manage( theInstr, &dres, archinfo )) 
+         if (dis_cache_manage( theInstr, &dres, archinfo ))
             goto decode_success;
          goto decode_failure;
 
@@ -19527,7 +19725,7 @@
         // if allow_V is not set, we'll skip trying to decode.
         if (!allow_V) goto decode_noV;
 
-    	  if (dis_vx_load( theInstr )) goto decode_success;
+	if (dis_vx_load( theInstr )) goto decode_success;
           goto decode_failure;
 
       /* VSX Store */
@@ -19540,14 +19738,14 @@
         // if allow_V is not set, we'll skip trying to decode.
         if (!allow_V) goto decode_noV;
 
-    	  if (dis_vx_store( theInstr )) goto decode_success;
+	if (dis_vx_store( theInstr )) goto decode_success;
     	  goto decode_failure;
 
       /* Miscellaneous ISA 2.06 instructions */
       case 0x1FA: // popcntd
       case 0x17A: // popcntw
       case 0x7A:  // popcntb
-    	  if (dis_int_logic( theInstr )) goto decode_success;
+	  if (dis_int_logic( theInstr )) goto decode_success;
     	  goto decode_failure;
 
       case 0x0FC: // bpermd
@@ -19954,6 +20152,13 @@
    /* global -- ick */
    mode64 = guest_arch == VexArchPPC64;
    ty = mode64 ? Ity_I64 : Ity_I32;
+   if (!mode64 && (host_endness_IN == VexEndnessLE)) {
+      vex_printf("disInstr(ppc): Little Endian 32-bit mode is not supported\n");
+      dres.whatNext    = Dis_StopHere;
+      dres.jk_StopHere = Ijk_NoDecode;
+      dres.len         = 0;
+      return dres;
+   }
 
    /* do some sanity checks */
    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
@@ -19979,7 +20184,7 @@
    guest_CIA_bbstart    = mkSzAddr(ty, guest_IP - delta);
 
    dres = disInstr_PPC_WRK ( resteerOkFn, resteerCisOk, callback_opaque,
-                             delta, archinfo, abiinfo, sigill_diag_IN );
+                             delta, archinfo, abiinfo, sigill_diag_IN);
 
    return dres;
 }
diff --git a/priv/host_ppc_defs.c b/priv/host_ppc_defs.c
index 43101b0..70b65fc 100644
--- a/priv/host_ppc_defs.c
+++ b/priv/host_ppc_defs.c
@@ -3115,24 +3115,38 @@
    return n;
 }
 
-/* Emit an instruction big-endianly */
-static UChar* emit32 ( UChar* p, UInt w32 )
+/* Emit an instruction ppc-endianly */
+static UChar* emit32 ( UChar* p, UInt w32, VexEndness endness_host )
 {
-   *p++ = toUChar((w32 >> 24) & 0x000000FF);
-   *p++ = toUChar((w32 >> 16) & 0x000000FF);
-   *p++ = toUChar((w32 >>  8) & 0x000000FF);
-   *p++ = toUChar((w32)       & 0x000000FF);
+  if (endness_host == VexEndnessBE) {
+    *p++ = toUChar((w32 >> 24) & 0x000000FF);
+    *p++ = toUChar((w32 >> 16) & 0x000000FF);
+    *p++ = toUChar((w32 >>  8) & 0x000000FF);
+    *p++ = toUChar((w32)       & 0x000000FF);
+  } else {
+    *p++ = toUChar((w32)       & 0x000000FF);
+    *p++ = toUChar((w32 >>  8) & 0x000000FF);
+    *p++ = toUChar((w32 >> 16) & 0x000000FF);
+    *p++ = toUChar((w32 >> 24) & 0x000000FF);
+  }
    return p;
 }
 
-/* Fetch an instruction big-endianly */
-static UInt fetch32 ( UChar* p )
+/* Fetch an instruction ppc-endianly */
+static UInt fetch32 ( UChar* p, VexEndness endness_host )
 {
    UInt w32 = 0;
-   w32 |= ((0xFF & (UInt)p[0]) << 24);
-   w32 |= ((0xFF & (UInt)p[1]) << 16);
-   w32 |= ((0xFF & (UInt)p[2]) <<  8);
-   w32 |= ((0xFF & (UInt)p[3]) <<  0);
+   if (endness_host == VexEndnessBE) {
+      w32 |= ((0xFF & (UInt)p[0]) << 24);
+      w32 |= ((0xFF & (UInt)p[1]) << 16);
+      w32 |= ((0xFF & (UInt)p[2]) <<  8);
+      w32 |= ((0xFF & (UInt)p[3]) <<  0);
+  } else {
+      w32 |= ((0xFF & (UInt)p[3]) << 24);
+      w32 |= ((0xFF & (UInt)p[2]) << 16);
+      w32 |= ((0xFF & (UInt)p[1]) <<  8);
+      w32 |= ((0xFF & (UInt)p[0]) <<  0);
+  }
    return w32;
 }
 
@@ -3141,7 +3155,7 @@
  */
 
 static UChar* mkFormD ( UChar* p, UInt opc1,
-                        UInt r1, UInt r2, UInt imm )
+                        UInt r1, UInt r2, UInt imm, VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3149,11 +3163,12 @@
    vassert(r2   < 0x20);
    imm = imm & 0xFFFF;
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (imm));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormMD ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                         UInt imm1, UInt imm2, UInt opc2 )
+                         UInt imm1, UInt imm2, UInt opc2,
+                         VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3166,11 +3181,11 @@
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                ((imm1 & 0x1F)<<11) | (imm2<<5) |
                (opc2<<2) | ((imm1 >> 5)<<1));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormX ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                        UInt r3, UInt opc2, UInt b0 )
+                        UInt r3, UInt opc2, UInt b0, VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3181,11 +3196,12 @@
    vassert(b0   < 0x2);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (r3<<11) | (opc2<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormXO ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                         UInt r3, UInt b10, UInt opc2, UInt b0 )
+                         UInt r3, UInt b10, UInt opc2, UInt b0,
+                         VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3197,11 +3213,11 @@
    vassert(b0   < 0x2);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (r3<<11) | (b10 << 10) | (opc2<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormXL ( UChar* p, UInt opc1, UInt f1, UInt f2,
-                         UInt f3, UInt opc2, UInt b0 )
+                         UInt f3, UInt opc2, UInt b0, VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3212,11 +3228,12 @@
    vassert(b0   < 0x2);
    theInstr = ((opc1<<26) | (f1<<21) | (f2<<16) |
                (f3<<11) | (opc2<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 // Note: for split field ops, give mnemonic arg
-static UChar* mkFormXFX ( UChar* p, UInt r1, UInt f2, UInt opc2 )
+static UChar* mkFormXFX ( UChar* p, UInt r1, UInt f2, UInt opc2,
+                          VexEndness endness_host )
 {
    UInt theInstr;
    vassert(r1   < 0x20);
@@ -3237,21 +3254,23 @@
    default: vpanic("mkFormXFX(ppch)");
    }
    theInstr = ((31<<26) | (r1<<21) | (f2<<11) | (opc2<<1));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 // Only used by mtfsf
-static UChar* mkFormXFL ( UChar* p, UInt FM, UInt freg, UInt dfp_rm )
+static UChar* mkFormXFL ( UChar* p, UInt FM, UInt freg, UInt dfp_rm,
+                          VexEndness endness_host )
 {
    UInt theInstr;
    vassert(FM   < 0x100);
    vassert(freg < 0x20);
    theInstr = ((63<<26) | (FM<<17) | (dfp_rm<<16) | (freg<<11) | (711<<1));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormXS ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                         UInt imm, UInt opc2, UInt b0 )
+                         UInt imm, UInt opc2, UInt b0,
+                         VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3262,26 +3281,27 @@
    vassert(b0   < 0x2);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                ((imm & 0x1F)<<11) | (opc2<<2) | ((imm>>5)<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 
 #if 0
 // 'b'
-static UChar* mkFormI ( UChar* p, UInt LI, UInt AA, UInt LK )
+static UChar* mkFormI ( UChar* p, UInt LI, UInt AA, UInt LK,
+                        VexEndness endness_host )
 {
    UInt theInstr;
    vassert(LI  < 0x1000000);
    vassert(AA  < 0x2);
    vassert(LK  < 0x2);
    theInstr = ((18<<26) | (LI<<2) | (AA<<1) | (LK));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 #endif
 
 // 'bc'
 static UChar* mkFormB ( UChar* p, UInt BO, UInt BI,
-                        UInt BD, UInt AA, UInt LK )
+                        UInt BD, UInt AA, UInt LK, VexEndness endness_host )
 {
    UInt theInstr;
    vassert(BO  < 0x20);
@@ -3291,12 +3311,13 @@
    vassert(LK  < 0x2);
    theInstr = ((16<<26) | (BO<<21) | (BI<<16) |
                (BD<<2) | (AA<<1) | (LK));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 // rotates
 static UChar* mkFormM ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                        UInt f3, UInt MB, UInt ME, UInt Rc )
+                        UInt f3, UInt MB, UInt ME, UInt Rc,
+                        VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3308,11 +3329,12 @@
    vassert(Rc   < 0x2);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (f3<<11) | (MB<<6) | (ME<<1) | (Rc));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormA ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                        UInt r3, UInt r4, UInt opc2, UInt b0 )
+                        UInt r3, UInt r4, UInt opc2, UInt b0,
+                        VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3324,11 +3346,12 @@
    vassert(b0   < 0x2 );
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) |
                (r4<<6) | (opc2<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormZ22 ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                          UInt constant, UInt opc2, UInt b0 )
+                          UInt constant, UInt opc2, UInt b0,
+                          VexEndness endness_host)
 {
    UInt theInstr;
    vassert(opc1     < 0x40);
@@ -3339,11 +3362,12 @@
    vassert(b0       < 0x2);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (constant<<10) | (opc2<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormZ23 ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                          UInt r3, UInt rmc, UInt opc2, UInt b0 )
+                          UInt r3, UInt rmc, UInt opc2, UInt b0,
+                          VexEndness endness_host)
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3355,11 +3379,11 @@
    vassert(b0   < 0x2);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (r3<<11) | (rmc<<9) | (opc2<<1) | (b0));
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* doAMode_IR ( UChar* p, UInt opc1, UInt rSD,
-                           PPCAMode* am, Bool mode64 )
+                           PPCAMode* am, Bool mode64, VexEndness endness_host )
 {
    UInt rA, idx;
    vassert(am->tag == Pam_IR);
@@ -3374,12 +3398,13 @@
          should be guaranteed to us by iselWordExpr_AMode. */
       vassert(0 == (idx & 3));
    }
-   p = mkFormD(p, opc1, rSD, rA, idx);
+   p = mkFormD(p, opc1, rSD, rA, idx, endness_host);
    return p;
 }
 
 static UChar* doAMode_RR ( UChar* p, UInt opc1, UInt opc2,
-                           UInt rSD, PPCAMode* am, Bool mode64 )
+                           UInt rSD, PPCAMode* am, Bool mode64,
+                           VexEndness endness_host )
 {
    UInt rA, rB;
    vassert(am->tag == Pam_RR);
@@ -3387,13 +3412,14 @@
    rA  = iregNo(am->Pam.RR.base, mode64);
    rB  = iregNo(am->Pam.RR.index, mode64);
    
-   p = mkFormX(p, opc1, rSD, rA, rB, opc2, 0);
+   p = mkFormX(p, opc1, rSD, rA, rB, opc2, 0, endness_host);
    return p;
 }
 
 
 /* Load imm to r_dst */
-static UChar* mkLoadImm ( UChar* p, UInt r_dst, ULong imm, Bool mode64 )
+static UChar* mkLoadImm ( UChar* p, UInt r_dst, ULong imm, Bool mode64,
+                          VexEndness endness_host )
 {
    vassert(r_dst < 0x20);
 
@@ -3411,15 +3437,15 @@
       // sign-extendable from 16 bits
 
       // addi r_dst,0,imm  => li r_dst,imm
-      p = mkFormD(p, 14, r_dst, 0, imm & 0xFFFF);
+      p = mkFormD(p, 14, r_dst, 0, imm & 0xFFFF, endness_host);
    } else {
       if (imm >= 0xFFFFFFFF80000000ULL || imm < 0x80000000ULL) {
          // sign-extendable from 32 bits
 
          // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
-         p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+         p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF, endness_host);
          // ori r_dst, r_dst, (imm & 0xFFFF)
-         p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+         p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF, endness_host);
       } else {
          // full 64bit immediate load: 5 (five!) insns.
          vassert(mode64);
@@ -3427,24 +3453,24 @@
          // load high word
 
          // lis r_dst, (imm>>48) & 0xFFFF
-         p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+         p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF, endness_host);
 
          // ori r_dst, r_dst, (imm>>32) & 0xFFFF
          if ((imm>>32) & 0xFFFF)
-            p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+	   p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF, endness_host);
          
          // shift r_dst low word to high word => rldicr
-         p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+         p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1, endness_host);
 
          // load low word
 
          // oris r_dst, r_dst, (imm>>16) & 0xFFFF
          if ((imm>>16) & 0xFFFF)
-            p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+            p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF, endness_host);
 
          // ori r_dst, r_dst, (imm) & 0xFFFF
          if (imm & 0xFFFF)
-            p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+            p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF, endness_host);
       }
    }
    return p;
@@ -3455,7 +3481,8 @@
    fewer.  This is needed for generating fixed sized patchable
    sequences. */
 static UChar* mkLoadImm_EXACTLY2or5 ( UChar* p,
-                                      UInt r_dst, ULong imm, Bool mode64 )
+                                      UInt r_dst, ULong imm, Bool mode64,
+                                      VexEndness endness_host )
 {
    vassert(r_dst < 0x20);
 
@@ -3470,29 +3497,29 @@
 
    if (!mode64) {
       // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
-      p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+      p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF, endness_host);
       // ori r_dst, r_dst, (imm & 0xFFFF)
-      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF, endness_host);
 
    } else {
       // full 64bit immediate load: 5 (five!) insns.
 
       // load high word
       // lis r_dst, (imm>>48) & 0xFFFF
-      p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+      p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF, endness_host);
 
       // ori r_dst, r_dst, (imm>>32) & 0xFFFF
-      p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+      p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF, endness_host);
          
       // shift r_dst low word to high word => rldicr
-      p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+      p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1, endness_host);
 
       // load low word
       // oris r_dst, r_dst, (imm>>16) & 0xFFFF
-      p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+      p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF, endness_host);
 
       // ori r_dst, r_dst, (imm) & 0xFFFF
-      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF, endness_host);
    }
    return p;
 }
@@ -3500,7 +3527,8 @@
 /* Checks whether the sequence of bytes at p was indeed created
    by mkLoadImm_EXACTLY2or5 with the given parameters. */
 static Bool isLoadImm_EXACTLY2or5 ( UChar* p_to_check,
-                                    UInt r_dst, ULong imm, Bool mode64 )
+                                    UInt r_dst, ULong imm, Bool mode64,
+                                    VexEndness endness_host )
 {
    vassert(r_dst < 0x20);
 
@@ -3517,13 +3545,13 @@
       UInt   expect[2] = { 0, 0 };
       UChar* p         = (UChar*)&expect[0];
       // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
-      p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+      p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF, endness_host);
       // ori r_dst, r_dst, (imm & 0xFFFF)
-      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF, endness_host);
       vassert(p == (UChar*)&expect[2]);
 
-      return fetch32(p_to_check + 0) == expect[0]
-             && fetch32(p_to_check + 4) == expect[1];
+      return fetch32(p_to_check + 0, endness_host) == expect[0]
+             && fetch32(p_to_check + 4, endness_host) == expect[1];
 
    } else {
       UInt   expect[5] = { 0, 0, 0, 0, 0 };
@@ -3532,28 +3560,28 @@
 
       // load high word
       // lis r_dst, (imm>>48) & 0xFFFF
-      p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+      p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF, endness_host);
 
       // ori r_dst, r_dst, (imm>>32) & 0xFFFF
-      p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+      p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF, endness_host);
          
       // shift r_dst low word to high word => rldicr
-      p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+      p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1, endness_host);
 
       // load low word
       // oris r_dst, r_dst, (imm>>16) & 0xFFFF
-      p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+      p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF, endness_host);
 
       // ori r_dst, r_dst, (imm) & 0xFFFF
-      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+      p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF, endness_host);
 
       vassert(p == (UChar*)&expect[5]);
 
-      return fetch32(p_to_check + 0) == expect[0]
-             && fetch32(p_to_check + 4) == expect[1]
-             && fetch32(p_to_check + 8) == expect[2]
-             && fetch32(p_to_check + 12) == expect[3]
-             && fetch32(p_to_check + 16) == expect[4];
+      return fetch32(p_to_check + 0, endness_host) == expect[0]
+             && fetch32(p_to_check + 4,  endness_host) == expect[1]
+             && fetch32(p_to_check + 8,  endness_host) == expect[2]
+             && fetch32(p_to_check + 12, endness_host) == expect[3]
+             && fetch32(p_to_check + 16, endness_host) == expect[4];
    }
 }
 
@@ -3562,7 +3590,7 @@
    the Pin_Load and Pin_Store cases below. */
 static UChar* do_load_or_store_machine_word ( 
                  UChar* p, Bool isLoad,
-                 UInt reg, PPCAMode* am, Bool mode64 )
+                 UInt reg, PPCAMode* am, Bool mode64, VexEndness endness_host )
 {
    if (isLoad) {
       UInt opc1, sz = mode64 ? 8 : 4;
@@ -3576,7 +3604,7 @@
                case 8:  opc1 = 58; vassert(mode64);  break;
                default: vassert(0);
             }
-            p = doAMode_IR(p, opc1, reg, am, mode64);
+            p = doAMode_IR(p, opc1, reg, am, mode64, endness_host);
             break;
          case Pam_RR:
             /* we could handle this case, but we don't expect to ever
@@ -3597,7 +3625,7 @@
                case 8:  opc1 = 62; vassert(mode64);  break;
                default: vassert(0);
             }
-            p = doAMode_IR(p, opc1, reg, am, mode64);
+            p = doAMode_IR(p, opc1, reg, am, mode64, endness_host);
             break;
          case Pam_RR:
             /* we could handle this case, but we don't expect to ever
@@ -3614,7 +3642,7 @@
    do_load_or_store_machine_word above. */
 static UChar* do_load_or_store_word32 ( 
                  UChar* p, Bool isLoad,
-                 UInt reg, PPCAMode* am, Bool mode64 )
+                 UInt reg, PPCAMode* am, Bool mode64, VexEndness endness_host )
 {
    if (isLoad) {
       UInt opc1;
@@ -3624,7 +3652,7 @@
                vassert(0 == (am->Pam.IR.index & 3));
             }
             opc1 = 32;
-            p = doAMode_IR(p, opc1, reg, am, mode64);
+            p = doAMode_IR(p, opc1, reg, am, mode64, endness_host);
             break;
          case Pam_RR:
             /* we could handle this case, but we don't expect to ever
@@ -3641,7 +3669,7 @@
                vassert(0 == (am->Pam.IR.index & 3));
             }
             opc1 = 36;
-            p = doAMode_IR(p, opc1, reg, am, mode64);
+            p = doAMode_IR(p, opc1, reg, am, mode64, endness_host);
             break;
          case Pam_RR:
             /* we could handle this case, but we don't expect to ever
@@ -3655,20 +3683,21 @@
 }
 
 /* Move r_dst to r_src */
-static UChar* mkMoveReg ( UChar* p, UInt r_dst, UInt r_src )
+static UChar* mkMoveReg ( UChar* p, UInt r_dst, UInt r_src,
+                          VexEndness endness_host )
 {
    vassert(r_dst < 0x20);
    vassert(r_src < 0x20);
 
    if (r_dst != r_src) {
       /* or r_dst, r_src, r_src */
-      p = mkFormX(p, 31, r_src, r_dst, r_src, 444, 0 );
+      p = mkFormX(p, 31, r_src, r_dst, r_src, 444, 0, endness_host );
    }
    return p;
 }
 
 static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                         UInt r3, UInt opc2 )
+                         UInt r3, UInt opc2, VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3677,11 +3706,12 @@
    vassert(r3   < 0x20);
    vassert(opc2 < 0x800);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | opc2);
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                          UInt r3, UInt Rc, UInt opc2 )
+                          UInt r3, UInt Rc, UInt opc2,
+                          VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3692,11 +3722,11 @@
    vassert(opc2 < 0x400);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (r3<<11) | (Rc<<10) | opc2);
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 static UChar* mkFormVA ( UChar* p, UInt opc1, UInt r1, UInt r2,
-                         UInt r3, UInt r4, UInt opc2 )
+                         UInt r3, UInt r4, UInt opc2, VexEndness endness_host )
 {
    UInt theInstr;
    vassert(opc1 < 0x40);
@@ -3707,7 +3737,7 @@
    vassert(opc2 < 0x40);
    theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
                (r3<<11) | (r4<<6) | opc2);
-   return emit32(p, theInstr);
+   return emit32(p, theInstr, endness_host);
 }
 
 
@@ -3724,7 +3754,7 @@
                     void* disp_cp_chain_me_to_slowEP,
                     void* disp_cp_chain_me_to_fastEP,
                     void* disp_cp_xindir,
-                    void* disp_cp_xassisted )
+                    void* disp_cp_xassisted)
 {
    UChar* p = &buf[0];
    vassert(nbuf >= 32);
@@ -3737,7 +3767,7 @@
 
    case Pin_LI:
       p = mkLoadImm(p, iregNo(i->Pin.LI.dst, mode64),
-                    i->Pin.LI.imm64, mode64);
+                    i->Pin.LI.imm64, mode64, endness_host);
       goto done;
 
    case Pin_Alu: {
@@ -3754,10 +3784,10 @@
             /* addi (PPC32 p350) */
             vassert(srcR->Prh.Imm.syned);
             vassert(srcR->Prh.Imm.imm16 != 0x8000);
-            p = mkFormD(p, 14, r_dst, r_srcL, srcR->Prh.Imm.imm16);
+            p = mkFormD(p, 14, r_dst, r_srcL, srcR->Prh.Imm.imm16, endness_host);
          } else {
             /* add (PPC32 p347) */
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 266, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 266, 0, endness_host);
          }
          break;
 
@@ -3766,10 +3796,11 @@
             /* addi (PPC32 p350), but with negated imm */
             vassert(srcR->Prh.Imm.syned);
             vassert(srcR->Prh.Imm.imm16 != 0x8000);
-            p = mkFormD(p, 14, r_dst, r_srcL, (- srcR->Prh.Imm.imm16));
+            p = mkFormD(p, 14, r_dst, r_srcL, (- srcR->Prh.Imm.imm16),
+                        endness_host);
          } else {
             /* subf (PPC32 p537), with args the "wrong" way round */
-            p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 40, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 40, 0, endness_host);
          }
          break;
 
@@ -3777,10 +3808,10 @@
          if (immR) {
             /* andi. (PPC32 p358) */
             vassert(!srcR->Prh.Imm.syned);
-            p = mkFormD(p, 28, r_srcL, r_dst, srcR->Prh.Imm.imm16);
+            p = mkFormD(p, 28, r_srcL, r_dst, srcR->Prh.Imm.imm16, endness_host);
          } else {
             /* and (PPC32 p356) */
-            p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 28, 0);
+            p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 28, 0, endness_host);
          }
          break;
 
@@ -3788,10 +3819,10 @@
          if (immR) {
             /* ori (PPC32 p497) */
             vassert(!srcR->Prh.Imm.syned);
-            p = mkFormD(p, 24, r_srcL, r_dst, srcR->Prh.Imm.imm16);
+            p = mkFormD(p, 24, r_srcL, r_dst, srcR->Prh.Imm.imm16, endness_host);
          } else {
             /* or (PPC32 p495) */
-            p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 444, 0);
+            p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 444, 0, endness_host);
          }
          break;
 
@@ -3799,10 +3830,10 @@
          if (immR) {
             /* xori (PPC32 p550) */
             vassert(!srcR->Prh.Imm.syned);
-            p = mkFormD(p, 26, r_srcL, r_dst, srcR->Prh.Imm.imm16);
+            p = mkFormD(p, 26, r_srcL, r_dst, srcR->Prh.Imm.imm16, endness_host);
          } else {
             /* xor (PPC32 p549) */
-            p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 316, 0);
+            p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 316, 0, endness_host);
          }
          break;
 
@@ -3834,10 +3865,10 @@
                UInt n = srcR->Prh.Imm.imm16;
                vassert(!srcR->Prh.Imm.syned);
                vassert(n > 0 && n < 32);
-               p = mkFormM(p, 21, r_srcL, r_dst, n, 0, 31-n, 0);
+               p = mkFormM(p, 21, r_srcL, r_dst, n, 0, 31-n, 0, endness_host);
             } else {
                /* slw (PPC32 p505) */
-               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 24, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 24, 0, endness_host);
             }
          } else {
             if (immR) {
@@ -3848,10 +3879,10 @@
                UInt n = srcR->Prh.Imm.imm16;
                vassert(!srcR->Prh.Imm.syned);
                vassert(n > 0 && n < 64);
-               p = mkFormMD(p, 30, r_srcL, r_dst, n, 63-n, 1);
+               p = mkFormMD(p, 30, r_srcL, r_dst, n, 63-n, 1, endness_host);
             } else {
                /* sld (PPC64 p568) */
-               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 27, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 27, 0, endness_host);
             }
          }
          break;
@@ -3866,10 +3897,10 @@
                UInt n = srcR->Prh.Imm.imm16;
                vassert(!srcR->Prh.Imm.syned);
                vassert(n > 0 && n < 32);
-               p = mkFormM(p, 21, r_srcL, r_dst, 32-n, n, 31, 0);
+               p = mkFormM(p, 21, r_srcL, r_dst, 32-n, n, 31, 0, endness_host);
             } else {
                /* srw (PPC32 p508) */
-               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 536, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 536, 0, endness_host);
             }
          } else {
             if (immR) {
@@ -3880,10 +3911,10 @@
                UInt n = srcR->Prh.Imm.imm16;
                vassert(!srcR->Prh.Imm.syned);
                vassert(n > 0 && n < 64);
-               p = mkFormMD(p, 30, r_srcL, r_dst, 64-n, n, 0);
+               p = mkFormMD(p, 30, r_srcL, r_dst, 64-n, n, 0, endness_host);
             } else {
                /* srd (PPC64 p574) */
-               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 539, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 539, 0, endness_host);
             }
          }
          break;
@@ -3901,10 +3932,10 @@
                   vassert(n >= 0 && n < 32);
                else 
                   vassert(n > 0 && n < 32);
-               p = mkFormX(p, 31, r_srcL, r_dst, n, 824, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, n, 824, 0, endness_host);
             } else {
                /* sraw (PPC32 p506) */
-               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 792, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 792, 0, endness_host);
             }
          } else {
             if (immR) {
@@ -3912,10 +3943,10 @@
                UInt n = srcR->Prh.Imm.imm16;
                vassert(!srcR->Prh.Imm.syned);
                vassert(n > 0 && n < 64);
-               p = mkFormXS(p, 31, r_srcL, r_dst, n, 413, 0);
+               p = mkFormXS(p, 31, r_srcL, r_dst, n, 413, 0, endness_host);
             } else {
                /* srad (PPC32 p570) */
-               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 794, 0);
+               p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 794, 0, endness_host);
             }
          }
          break;
@@ -3935,15 +3966,15 @@
       
       if (isAdd) {
          if (setC) /* addc (PPC32 p348) */
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 10, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 10, 0, endness_host);
          else          /* adde (PPC32 p349) */
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 138, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 138, 0, endness_host);
       } else {
          /* subfX, with args the "wrong" way round */
          if (setC) /* subfc (PPC32 p538) */
-            p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 8, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 8, 0, endness_host);
          else          /* subfe (PPC32 p539) */
-            p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 136, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 136, 0, endness_host);
       }
       goto done;
    }
@@ -3967,17 +3998,17 @@
          imm_srcR = srcR->Prh.Imm.imm16;
          if (syned) {  // cmpw/di  (signed)   (PPC32 p368)
             vassert(imm_srcR != 0x8000);
-            p = mkFormD(p, 11, fld1, r_srcL, imm_srcR);
+            p = mkFormD(p, 11, fld1, r_srcL, imm_srcR, endness_host);
          } else {      // cmplw/di (unsigned) (PPC32 p370)
-            p = mkFormD(p, 10, fld1, r_srcL, imm_srcR);
+            p = mkFormD(p, 10, fld1, r_srcL, imm_srcR, endness_host);
          }
          break;
       case Prh_Reg:
          r_srcR = iregNo(srcR->Prh.Reg.reg, mode64);
          if (syned)  // cmpwi  (signed)   (PPC32 p367)
-            p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 0, 0);
+            p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 0, 0, endness_host);
          else        // cmplwi (unsigned) (PPC32 p379)
-            p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 32, 0);
+            p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 32, 0, endness_host);
          break;
       default: 
          goto bad;
@@ -3991,21 +4022,21 @@
 
       switch (i->Pin.Unary.op) {
       case Pun_NOT:  // nor r_dst,r_src,r_src
-         p = mkFormX(p, 31, r_src, r_dst, r_src, 124, 0);
+         p = mkFormX(p, 31, r_src, r_dst, r_src, 124, 0, endness_host);
          break;
       case Pun_NEG:  // neg r_dst,r_src
-         p = mkFormXO(p, 31, r_dst, r_src, 0, 0, 104, 0);
+         p = mkFormXO(p, 31, r_dst, r_src, 0, 0, 104, 0, endness_host);
          break;
       case Pun_CLZ32:  // cntlzw r_dst, r_src
-         p = mkFormX(p, 31, r_src, r_dst, 0, 26, 0);
+         p = mkFormX(p, 31, r_src, r_dst, 0, 26, 0, endness_host);
          break;
       case Pun_CLZ64:  // cntlzd r_dst, r_src
          vassert(mode64);
-         p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0);
+         p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0, endness_host);
          break;
       case Pun_EXTSW:  // extsw r_dst, r_src
          vassert(mode64);
-         p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0);
+         p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0, endness_host);
          break;
       default: goto bad;
       }
@@ -4026,22 +4057,25 @@
          // mul hi words, must consider sign
          if (sz32) {
             if (syned)  // mulhw r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 75, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 75, 0,
+                            endness_host);
             else        // mulhwu r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 11, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 11, 0,
+                            endness_host);
          } else {
             if (syned)  // mulhd r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 73, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 73, 0,
+                            endness_host);
             else        // mulhdu r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 9, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 9, 0, endness_host);
          }
       } else {
          // mul low word, sign is irrelevant
          vassert(!i->Pin.MulL.syned);
          if (sz32)      // mullw r_dst,r_srcL,r_srcR
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 235, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 235, 0, endness_host);
          else           // mulld r_dst,r_srcL,r_srcR
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 233, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 233, 0, endness_host);
       }
       goto done;
    }
@@ -4060,28 +4094,32 @@
          if (sz32) {
             if (syned)
                // divwe r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 427, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 427, 0,
+                            endness_host);
             else
                // divweu r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 395, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 395, 0,
+                            endness_host);
          } else {
             if (syned)
                // divde r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 425, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 425, 0,
+                            endness_host);
             else
                // divdeu r_dst,r_srcL,r_srcR
-               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 393, 0);
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 393, 0,
+                            endness_host);
          }
       } else if (sz32) {
          if (syned)  // divw r_dst,r_srcL,r_srcR
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 491, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 491, 0, endness_host);
          else        // divwu r_dst,r_srcL,r_srcR
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 459, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 459, 0, endness_host);
       } else {
          if (syned)  // divd r_dst,r_srcL,r_srcR
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 489, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 489, 0, endness_host);
          else        // divdu r_dst,r_srcL,r_srcR
-            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 457, 0);
+            p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 457, 0, endness_host);
       }
       goto done;
    }
@@ -4113,13 +4151,13 @@
       }
 
       /* load target to r_dst */                          // p += 4|8|20
-      p = mkLoadImm(p, r_dst, i->Pin.Call.target, mode64);
+      p = mkLoadImm(p, r_dst, i->Pin.Call.target, mode64, endness_host);
 
       /* mtspr 9,r_dst => move r_dst to count register */
-      p = mkFormXFX(p, r_dst, 9, 467);                    // p += 4
+      p = mkFormXFX(p, r_dst, 9, 467, endness_host);               // p += 4
       
       /* bctrl => branch to count register (and save to lr) */
-      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1);      // p += 4
+      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1, endness_host); // p += 4
 
       /* Fix up the conditional jump, if there was one. */
       if (cond.test != Pct_ALWAYS) {
@@ -4127,7 +4165,7 @@
          vassert(delta >= 16 && delta <= 32);
          /* bc !ct,cf,delta */
          mkFormB(ptmp, invertCondTest(cond.test),
-                 cond.flag, (delta>>2), 0, 0);
+                 cond.flag, (delta>>2), 0, 0, endness_host);
       }
       goto done;
    }
@@ -4156,11 +4194,12 @@
       /* Update the guest CIA. */
       /* imm32/64 r30, dstGA */
       if (!mode64) vassert(0 == (((ULong)i->Pin.XDirect.dstGA) >> 32));
-      p = mkLoadImm(p, /*r*/30, (ULong)i->Pin.XDirect.dstGA, mode64);
+      p = mkLoadImm(p, /*r*/30, (ULong)i->Pin.XDirect.dstGA, mode64,
+                    endness_host);
       /* stw/std r30, amCIA */
       p = do_load_or_store_machine_word(
              p, False/*!isLoad*/,
-             /*r*/30, i->Pin.XDirect.amCIA, mode64
+             /*r*/30, i->Pin.XDirect.amCIA, mode64, endness_host
           );
 
       /* --- FIRST PATCHABLE BYTE follows --- */
@@ -4173,11 +4212,11 @@
                = i->Pin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP 
                                          : disp_cp_chain_me_to_slowEP;
       p = mkLoadImm_EXACTLY2or5(
-             p, /*r*/30, Ptr_to_ULong(disp_cp_chain_me), mode64);
+             p, /*r*/30, Ptr_to_ULong(disp_cp_chain_me), mode64, endness_host);
       /* mtctr r30 */
-      p = mkFormXFX(p, /*r*/30, 9, 467);
+      p = mkFormXFX(p, /*r*/30, 9, 467, endness_host);
       /* bctrl */
-      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1);
+      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1, endness_host);
       /* --- END of PATCHABLE BYTES --- */
 
       /* Fix up the conditional jump, if there was one. */
@@ -4186,7 +4225,7 @@
          vassert(delta >= 16 && delta <= 64 && 0 == (delta & 3));
          /* bc !ct,cf,delta */
          mkFormB(ptmp, invertCondTest(i->Pin.XDirect.cond.test),
-                 i->Pin.XDirect.cond.flag, (delta>>2), 0, 0);
+                 i->Pin.XDirect.cond.flag, (delta>>2), 0, 0, endness_host);
       }
       goto done;
    }
@@ -4217,15 +4256,16 @@
       p = do_load_or_store_machine_word(
              p, False/*!isLoad*/,
              iregNo(i->Pin.XIndir.dstGA, mode64),
-             i->Pin.XIndir.amCIA, mode64
+             i->Pin.XIndir.amCIA, mode64, endness_host
           );
 
       /* imm32/64 r30, VG_(disp_cp_xindir) */
-      p = mkLoadImm(p, /*r*/30, (ULong)Ptr_to_ULong(disp_cp_xindir), mode64);
+      p = mkLoadImm(p, /*r*/30, (ULong)Ptr_to_ULong(disp_cp_xindir), mode64,
+                    endness_host);
       /* mtctr r30 */
-      p = mkFormXFX(p, /*r*/30, 9, 467);
+      p = mkFormXFX(p, /*r*/30, 9, 467, endness_host);
       /* bctr */
-      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0);
+      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0, endness_host);
 
       /* Fix up the conditional jump, if there was one. */
       if (i->Pin.XIndir.cond.test != Pct_ALWAYS) {
@@ -4233,7 +4273,7 @@
          vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3));
          /* bc !ct,cf,delta */
          mkFormB(ptmp, invertCondTest(i->Pin.XIndir.cond.test),
-                 i->Pin.XIndir.cond.flag, (delta>>2), 0, 0);
+                 i->Pin.XIndir.cond.flag, (delta>>2), 0, 0, endness_host);
       }
       goto done;
    }
@@ -4256,7 +4296,7 @@
       p = do_load_or_store_machine_word(
              p, False/*!isLoad*/,
              iregNo(i->Pin.XIndir.dstGA, mode64),
-             i->Pin.XIndir.amCIA, mode64
+             i->Pin.XIndir.amCIA, mode64, endness_host
           );
 
       /* imm32/64 r31, $magic_number */
@@ -4285,15 +4325,16 @@
             vpanic("emit_ARMInstr.Pin_XAssisted: unexpected jump kind");
       }
       vassert(trcval != 0);
-      p = mkLoadImm(p, /*r*/31, trcval, mode64);
+      p = mkLoadImm(p, /*r*/31, trcval, mode64, endness_host);
 
       /* imm32/64 r30, VG_(disp_cp_xassisted) */
       p = mkLoadImm(p, /*r*/30,
-                       (ULong)Ptr_to_ULong(disp_cp_xassisted), mode64);
+                       (ULong)Ptr_to_ULong(disp_cp_xassisted), mode64,
+                     endness_host);
       /* mtctr r30 */
-      p = mkFormXFX(p, /*r*/30, 9, 467);
+      p = mkFormXFX(p, /*r*/30, 9, 467, endness_host);
       /* bctr */
-      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0);
+      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0, endness_host);
 
       /* Fix up the conditional jump, if there was one. */
       if (i->Pin.XAssisted.cond.test != Pct_ALWAYS) {
@@ -4301,7 +4342,7 @@
          vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3));
          /* bc !ct,cf,delta */
          mkFormB(ptmp, invertCondTest(i->Pin.XAssisted.cond.test),
-                 i->Pin.XAssisted.cond.flag, (delta>>2), 0, 0);
+                 i->Pin.XAssisted.cond.flag, (delta>>2), 0, 0, endness_host);
       }
       goto done;
    }
@@ -4328,11 +4369,11 @@
       switch (i->Pin.CMov.src->tag) {
       case Pri_Imm:
          imm_src = i->Pin.CMov.src->Pri.Imm;
-         p = mkLoadImm(p, r_dst, imm_src, mode64);  // p += 4|8|20
+         p = mkLoadImm(p, r_dst, imm_src, mode64, endness_host);  // p += 4|8|20
          break;
       case Pri_Reg:
          r_src = iregNo(i->Pin.CMov.src->Pri.Reg, mode64);
-         p = mkMoveReg(p, r_dst, r_src);            // p += 4
+         p = mkMoveReg(p, r_dst, r_src, endness_host);            // p += 4
          break;
       default: goto bad;
       }
@@ -4343,7 +4384,7 @@
          vassert(delta >= 8 && delta <= 24);
          /* bc !ct,cf,delta */
          mkFormB(ptmp, invertCondTest(cond.test),
-                 cond.flag, (delta>>2), 0, 0);
+                 cond.flag, (delta>>2), 0, 0, endness_host);
       }
       goto done;
    }
@@ -4365,7 +4406,7 @@
             case 8:  opc1 = 58; vassert(mode64); break;
             default: goto bad;
          }
-         p = doAMode_IR(p, opc1, r_dst, am_addr, mode64);
+         p = doAMode_IR(p, opc1, r_dst, am_addr, mode64, endness_host);
          goto done;
       case Pam_RR:
          switch(sz) {
@@ -4375,7 +4416,7 @@
             case 8:  opc2 = 21; vassert(mode64); break;
             default: goto bad;
          }
-         p = doAMode_RR(p, 31, opc2, r_dst, am_addr, mode64);
+         p = doAMode_RR(p, 31, opc2, r_dst, am_addr, mode64, endness_host);
          goto done;
       default:
          goto bad;
@@ -4385,12 +4426,12 @@
    case Pin_LoadL: {
       if (i->Pin.LoadL.sz == 4) {
          p = mkFormX(p, 31, iregNo(i->Pin.LoadL.dst, mode64),
-                     0, iregNo(i->Pin.LoadL.src, mode64), 20, 0);
+                     0, iregNo(i->Pin.LoadL.src, mode64), 20, 0, endness_host);
          goto done;
       }
       if (i->Pin.LoadL.sz == 8 && mode64) {
          p = mkFormX(p, 31, iregNo(i->Pin.LoadL.dst, mode64),
-                     0, iregNo(i->Pin.LoadL.src, mode64), 84, 0);
+                     0, iregNo(i->Pin.LoadL.src, mode64), 84, 0, endness_host);
          goto done;
       }
       goto bad;
@@ -4405,22 +4446,22 @@
 
       if (cond.test == Pct_ALWAYS) {
          // Just load 1 to dst => li dst,1
-         p = mkFormD(p, 14, r_dst, 0, 1);
+         p = mkFormD(p, 14, r_dst, 0, 1, endness_host);
       } else {
          vassert(cond.flag != Pcf_NONE);
          rot_imm = 1 + cond.flag;
          r_tmp = 0;  // Not set in getAllocable, so no need to declare.
 
          // r_tmp = CR  => mfcr r_tmp
-         p = mkFormX(p, 31, r_tmp, 0, 0, 19, 0);
+         p = mkFormX(p, 31, r_tmp, 0, 0, 19, 0, endness_host);
 
          // r_dst = flag (rotate left and mask)
          //  => rlwinm r_dst,r_tmp,rot_imm,31,31
-         p = mkFormM(p, 21, r_tmp, r_dst, rot_imm, 31, 31, 0);
+         p = mkFormM(p, 21, r_tmp, r_dst, rot_imm, 31, 31, 0, endness_host);
 
          if (cond.test == Pct_FALSE) {
             // flip bit  => xori r_dst,r_dst,1
-            p = mkFormD(p, 26, r_dst, r_dst, 1);
+            p = mkFormD(p, 26, r_dst, r_dst, 1, endness_host);
          }
       }
       goto done;
@@ -4428,11 +4469,12 @@
 
    case Pin_MfCR:
       // mfcr dst
-      p = mkFormX(p, 31, iregNo(i->Pin.MfCR.dst, mode64), 0, 0, 19, 0);
+      p = mkFormX(p, 31, iregNo(i->Pin.MfCR.dst, mode64), 0, 0, 19, 0,
+                  endness_host);
       goto done;
 
    case Pin_MFence: {
-      p = mkFormX(p, 31, 0, 0, 0, 598, 0);   // sync, PPC32 p616
+      p = mkFormX(p, 31, 0, 0, 0, 598, 0, endness_host);   // sync, PPC32 p616
       // CAB: Should this be isync?
       //    p = mkFormXL(p, 19, 0, 0, 0, 150, 0);  // isync, PPC32 p467
       goto done;
@@ -4457,7 +4499,7 @@
          default:
             goto bad;
          }
-         p = doAMode_IR(p, opc1, r_src, am_addr, mode64);
+         p = doAMode_IR(p, opc1, r_src, am_addr, mode64, endness_host);
          goto done;
       case Pam_RR:
          switch(sz) {
@@ -4469,7 +4511,7 @@
          default:
             goto bad;
          }
-         p = doAMode_RR(p, 31, opc2, r_src, am_addr, mode64);
+         p = doAMode_RR(p, 31, opc2, r_src, am_addr, mode64, endness_host);
          goto done;
       default:
          goto bad;
@@ -4480,12 +4522,12 @@
    case Pin_StoreC: {
       if (i->Pin.StoreC.sz == 4) {
          p = mkFormX(p, 31, iregNo(i->Pin.StoreC.src, mode64),
-                     0, iregNo(i->Pin.StoreC.dst, mode64), 150, 1);
+                     0, iregNo(i->Pin.StoreC.dst, mode64), 150, 1, endness_host);
          goto done;
       }
       if (i->Pin.StoreC.sz == 8 && mode64) {
          p = mkFormX(p, 31, iregNo(i->Pin.StoreC.src, mode64),
-                     0, iregNo(i->Pin.StoreC.dst, mode64), 214, 1);
+                     0, iregNo(i->Pin.StoreC.dst, mode64), 214, 1, endness_host);
          goto done;
       }
       goto bad;
@@ -4496,34 +4538,34 @@
       UInt fr_src = fregNo(i->Pin.FpUnary.src);
       switch (i->Pin.FpUnary.op) {
       case Pfp_RSQRTE: // frsqrtre, PPC32 p424
-         p = mkFormA( p, 63, fr_dst, 0, fr_src, 0, 26, 0 );
+         p = mkFormA( p, 63, fr_dst, 0, fr_src, 0, 26, 0, endness_host );
          break;
       case Pfp_RES:   // fres, PPC32 p421
-         p = mkFormA( p, 59, fr_dst, 0, fr_src, 0, 24, 0 );
+         p = mkFormA( p, 59, fr_dst, 0, fr_src, 0, 24, 0, endness_host );
          break;
       case Pfp_SQRT:  // fsqrt, PPC32 p427
-         p = mkFormA( p, 63, fr_dst, 0, fr_src, 0, 22, 0 );
+         p = mkFormA( p, 63, fr_dst, 0, fr_src, 0, 22, 0, endness_host );
          break;
       case Pfp_ABS:   // fabs, PPC32 p399
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 264, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 264, 0, endness_host);
          break;
       case Pfp_NEG:   // fneg, PPC32 p416
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 40, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 40, 0, endness_host);
          break;
       case Pfp_MOV:   // fmr, PPC32 p410
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 72, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 72, 0, endness_host);
          break;
       case Pfp_FRIM:  // frim, PPC ISA 2.05 p137
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 488, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 488, 0, endness_host);
          break;
       case Pfp_FRIP:  // frip, PPC ISA 2.05 p137
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 456, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 456, 0, endness_host);
          break;
       case Pfp_FRIN:  // frin, PPC ISA 2.05 p137
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 392, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 392, 0, endness_host);
          break;
       case Pfp_FRIZ:  // friz, PPC ISA 2.05 p137
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 424, 0);
+         p = mkFormX(p, 63, fr_dst, 0, fr_src, 424, 0, endness_host);
          break;
       default:
          goto bad;
@@ -4537,28 +4579,28 @@
       UInt fr_srcR = fregNo(i->Pin.FpBinary.srcR);
       switch (i->Pin.FpBinary.op) {
       case Pfp_ADDD:   // fadd, PPC32 p400
-         p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 21, 0 );
+         p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 21, 0, endness_host );
          break;
       case Pfp_ADDS:   // fadds, PPC32 p401
-         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 21, 0 );
+         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 21, 0, endness_host );
          break;
       case Pfp_SUBD:   // fsub, PPC32 p429
-         p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 20, 0 );
+         p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 20, 0, endness_host );
          break;
       case Pfp_SUBS:   // fsubs, PPC32 p430
-         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 20, 0 );
+         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 20, 0, endness_host );
          break;
       case Pfp_MULD:   // fmul, PPC32 p413
-         p = mkFormA( p, 63, fr_dst, fr_srcL, 0, fr_srcR, 25, 0 );
+         p = mkFormA( p, 63, fr_dst, fr_srcL, 0, fr_srcR, 25, 0, endness_host );
          break;
       case Pfp_MULS:   // fmuls, PPC32 p414
-         p = mkFormA( p, 59, fr_dst, fr_srcL, 0, fr_srcR, 25, 0 );
+         p = mkFormA( p, 59, fr_dst, fr_srcL, 0, fr_srcR, 25, 0, endness_host );
          break;
       case Pfp_DIVD:   // fdiv, PPC32 p406
-         p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 18, 0 );
+         p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 18, 0, endness_host );
          break;
       case Pfp_DIVS:   // fdivs, PPC32 p407
-         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 18, 0 );
+         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 18, 0, endness_host );
          break;
       default:
          goto bad;
@@ -4573,16 +4615,20 @@
       UInt fr_srcAcc = fregNo(i->Pin.FpMulAcc.srcAcc);
       switch (i->Pin.FpMulAcc.op) {
       case Pfp_MADDD:   // fmadd, PPC32 p408
-         p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0 );
+         p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0,
+                      endness_host );
          break;
       case Pfp_MADDS:   // fmadds, PPC32 p409
-         p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0 );
+         p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0,
+                      endness_host );
          break;
       case Pfp_MSUBD:   // fmsub, PPC32 p411
-         p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0 );
+         p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0,
+                      endness_host );
          break;
       case Pfp_MSUBS:   // fmsubs, PPC32 p412
-         p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0 );
+         p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0,
+                      endness_host );
          break;
       default:
          goto bad;
@@ -4601,18 +4647,18 @@
       if (i->Pin.FpLdSt.isLoad) {   // Load from memory
          if (idxd) {  // lf[s|d]x, PPC32 p444|440
             opc = (sz == 4) ? 535 : 599;
-            p = doAMode_RR(p, 31, opc, f_reg, am_addr, mode64);
+            p = doAMode_RR(p, 31, opc, f_reg, am_addr, mode64, endness_host);
          } else {     // lf[s|d], PPC32 p441|437
             opc = (sz == 4) ? 48 : 50;
-            p = doAMode_IR(p, opc, f_reg, am_addr, mode64);
+            p = doAMode_IR(p, opc, f_reg, am_addr, mode64, endness_host);
          }
       } else {                      // Store to memory
          if (idxd) { // stf[s|d]x, PPC32 p521|516
             opc = (sz == 4) ? 663 : 727;
-            p = doAMode_RR(p, 31, opc, f_reg, am_addr, mode64);
+            p = doAMode_RR(p, 31, opc, f_reg, am_addr, mode64, endness_host);
          } else {    // stf[s|d], PPC32 p518|513
             opc = (sz == 4) ? 52 : 54;
-            p = doAMode_IR(p, opc, f_reg, am_addr, mode64);
+            p = doAMode_IR(p, opc, f_reg, am_addr, mode64, endness_host);
          }
       }
       goto done;
@@ -4623,7 +4669,7 @@
       UInt fr_data = fregNo(i->Pin.FpSTFIW.data);
       // stfiwx (store fp64[lo32] as int32), PPC32 p517
       // Use rA==0, so that EA == rB == ir_addr
-      p = mkFormX(p, 31, fr_data, 0/*rA=0*/, ir_addr, 983, 0);
+      p = mkFormX(p, 31, fr_data, 0/*rA=0*/, ir_addr, 983, 0, endness_host);
       goto done;
    }
 
@@ -4631,7 +4677,7 @@
       UInt fr_dst = fregNo(i->Pin.FpRSP.dst);
       UInt fr_src = fregNo(i->Pin.FpRSP.src);
       // frsp, PPC32 p423
-      p = mkFormX(p, 63, fr_dst, 0, fr_src, 12, 0);
+      p = mkFormX(p, 63, fr_dst, 0, fr_src, 12, 0, endness_host);
       goto done;
    }
 
@@ -4641,37 +4687,37 @@
       if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True) {
          if (i->Pin.FpCftI.syned == True) {
             // fctiw (conv f64 to i32), PPC32 p404
-            p = mkFormX(p, 63, fr_dst, 0, fr_src, 14, 0);
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 14, 0, endness_host);
             goto done;
          } else {
             // fctiwu (conv f64 to u32)
-            p = mkFormX(p, 63, fr_dst, 0, fr_src, 142, 0);
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 142, 0, endness_host);
             goto done;
          }
       }
       if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == False) {
          if (i->Pin.FpCftI.syned == True) {
             // fctid (conv f64 to i64), PPC64 p437
-            p = mkFormX(p, 63, fr_dst, 0, fr_src, 814, 0);
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 814, 0, endness_host);
             goto done;
          } else {
             // fctidu (conv f64 to u64)
-            p = mkFormX(p, 63, fr_dst, 0, fr_src, 942, 0);
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 942, 0, endness_host);
             goto done;
          }
       }
       if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) {
          if (i->Pin.FpCftI.syned == True) {
             // fcfid (conv i64 to f64), PPC64 p434
-            p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0);
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0, endness_host);
             goto done;
          } else if (i->Pin.FpCftI.flt64 == True) {
             // fcfidu (conv u64 to f64)
-            p = mkFormX(p, 63, fr_dst, 0, fr_src, 974, 0);
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 974, 0, endness_host);
             goto done;
          } else {
             // fcfidus (conv u64 to f32)
-            p = mkFormX(p, 59, fr_dst, 0, fr_src, 974, 0);
+            p = mkFormX(p, 59, fr_dst, 0, fr_src, 974, 0, endness_host);
             goto done;
          }
       }
@@ -4690,17 +4736,18 @@
       /* jmp fwds if !condition */
       if (cc.test != Pct_ALWAYS) {
          /* bc !ct,cf,n_bytes>>2 */
-         p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0);
+         p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0,
+                     endness_host);
       }
 
       // fmr, PPC32 p410
-      p = mkFormX(p, 63, fr_dst, 0, fr_src, 72, 0);
+      p = mkFormX(p, 63, fr_dst, 0, fr_src, 72, 0, endness_host);
       goto done;
    }
 
    case Pin_FpLdFPSCR: {
       UInt fr_src = fregNo(i->Pin.FpLdFPSCR.src);
-      p = mkFormXFL(p, 0xFF, fr_src, i->Pin.FpLdFPSCR.dfp_rm);     // mtfsf, PPC32 p480
+      p = mkFormXFL(p, 0xFF, fr_src, i->Pin.FpLdFPSCR.dfp_rm, endness_host); // mtfsf, PPC32 p480
       goto done;
    }
 
@@ -4711,21 +4758,22 @@
       UInt  fr_srcR = fregNo(i->Pin.FpCmp.srcR);
       vassert(crfD < 8);
       // fcmpo, PPC32 p402
-      p = mkFormX(p, 63, crfD<<2, fr_srcL, fr_srcR, 32, 0);
+      p = mkFormX(p, 63, crfD<<2, fr_srcL, fr_srcR, 32, 0, endness_host);
 
       // mfcr (mv CR to r_dst), PPC32 p467
-      p = mkFormX(p, 31, r_dst, 0, 0, 19, 0);
+      p = mkFormX(p, 31, r_dst, 0, 0, 19, 0, endness_host);
       
       // rlwinm r_dst,r_dst,8,28,31, PPC32 p501
       //  => rotate field 1 to bottomw of word, masking out upper 28
-      p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0);
+      p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0, endness_host);
       goto done;
    }
 
    case Pin_RdWrLR: {
       UInt reg = iregNo(i->Pin.RdWrLR.gpr, mode64);
       /* wrLR==True ? mtlr r4 : mflr r4 */
-      p = mkFormXFX(p, reg, 8, (i->Pin.RdWrLR.wrLR==True) ? 467 : 339);
+      p = mkFormXFX(p, reg, 8, (i->Pin.RdWrLR.wrLR==True) ? 467 : 339,
+                    endness_host);
       goto done;
    }
 
@@ -4744,17 +4792,17 @@
       if (!idxd) {
          r_idx = 30;                       // XXX: Using r30 as temp
          p = mkLoadImm(p, r_idx,
-                       i->Pin.AvLdSt.addr->Pam.IR.index, mode64);
+                       i->Pin.AvLdSt.addr->Pam.IR.index, mode64, endness_host);
       } else {
          r_idx  = iregNo(i->Pin.AvLdSt.addr->Pam.RR.index, mode64);
       }
 
       if (i->Pin.FpLdSt.isLoad) {  // Load from memory (1,2,4,16)
          opc2 = (sz==1) ?   7 : (sz==2) ?  39 : (sz==4) ?  71 : 103;
-         p = mkFormX(p, 31, v_reg, r_idx, r_base, opc2, 0);
+         p = mkFormX(p, 31, v_reg, r_idx, r_base, opc2, 0, endness_host);
       } else {                      // Store to memory (1,2,4,16)
          opc2 = (sz==1) ? 135 : (sz==2) ? 167 : (sz==4) ? 199 : 231;
-         p = mkFormX(p, 31, v_reg, r_idx, r_base, opc2, 0);
+         p = mkFormX(p, 31, v_reg, r_idx, r_base, opc2, 0, endness_host);
       }
       goto done;
    }
@@ -4784,10 +4832,10 @@
       switch (i->Pin.AvUnary.op) {
       case Pav_MOV:
       case Pav_NOT:
-         p = mkFormVX( p, 4, v_dst, v_src, v_src, opc2 );
+         p = mkFormVX( p, 4, v_dst, v_src, v_src, opc2, endness_host );
          break;
       default:
-         p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+         p = mkFormVX( p, 4, v_dst, 0, v_src, opc2, endness_host );
          break;
       }
       goto done;
@@ -4799,13 +4847,13 @@
       UInt v_srcR = vregNo(i->Pin.AvBinary.srcR);
       UInt opc2;
       if (i->Pin.AvBinary.op == Pav_SHL) {
-         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1036 ); // vslo
-         p = mkFormVX( p, 4, v_dst, v_dst,  v_srcR, 452 );  // vsl
+         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1036, endness_host ); // vslo
+         p = mkFormVX( p, 4, v_dst, v_dst,  v_srcR, 452, endness_host );  // vsl
          goto done;
       }
       if (i->Pin.AvBinary.op == Pav_SHR) {
-         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1100 ); // vsro
-         p = mkFormVX( p, 4, v_dst, v_dst,  v_srcR, 708 );  // vsr
+         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1100, endness_host ); // vsro
+         p = mkFormVX( p, 4, v_dst, v_dst,  v_srcR, 708, endness_host );  // vsr
          goto done;
       }
       switch (i->Pin.AvBinary.op) {
@@ -4816,7 +4864,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2, endness_host );
       goto done;
    }
 
@@ -4864,7 +4912,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2, endness_host );
       goto done;
    }
 
@@ -4918,7 +4966,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2, endness_host );
       goto done;
    }
 
@@ -4977,7 +5025,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2, endness_host );
       goto done;
    }
 
@@ -5009,7 +5057,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2, endness_host );
       goto done;
    }
    case Pin_AvCipherV128Unary: {
@@ -5021,7 +5069,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_src, 0, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_src, 0, opc2, endness_host );
       goto done;
    }
    case Pin_AvCipherV128Binary: {
@@ -5037,7 +5085,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2, endness_host );
       goto done;
    }
    case Pin_AvHashV128Binary: {
@@ -5051,7 +5099,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, v_src, s_field->Pri.Imm, opc2 );
+      p = mkFormVX( p, 4, v_dst, v_src, s_field->Pri.Imm, opc2, endness_host );
       goto done;
    }
    case Pin_AvBCDV128Trinary: {
@@ -5067,7 +5115,7 @@
          goto bad;
       }
       p = mkFormVXR( p, 4, v_dst, v_src1, v_src2,
-                     0x1, (ps->Pri.Imm << 9) | opc2 );
+                     0x1, (ps->Pri.Imm << 9) | opc2, endness_host );
       goto done;
    }
    case Pin_AvBin32Fx4: {
@@ -5077,16 +5125,16 @@
       switch (i->Pin.AvBin32Fx4.op) {
 
       case Pavfp_ADDF:
-         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 10 );   // vaddfp
+         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 10, endness_host );   // vaddfp
          break;
       case Pavfp_SUBF:
-         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 74 );   // vsubfp
+         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 74, endness_host );   // vsubfp
          break;
       case Pavfp_MAXF:
-         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1034 ); // vmaxfp
+         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1034, endness_host ); // vmaxfp
          break;
       case Pavfp_MINF:
-         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1098 ); // vminfp
+         p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1098, endness_host ); // vminfp
          break;
 
       case Pavfp_MULF: {
@@ -5101,23 +5149,23 @@
 
          // Better way to load -0.0 (0x80000000) ?
          // vspltisw vB,0x1F   (0x1F => each word of vB)
-         p = mkFormVX( p, 4, vB, konst, 0, 908 );
+         p = mkFormVX( p, 4, vB, konst, 0, 908, endness_host );
 
          // vslw vB,vB,vB (each word of vB = (0x1F << 0x1F) = 0x80000000
-         p = mkFormVX( p, 4, vB, vB, vB, 388 );
+         p = mkFormVX( p, 4, vB, vB, vB, 388, endness_host );
 
          // Finally, do the multiply:
-         p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 );
+         p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46, endness_host );
          break;
       }
       case Pavfp_CMPEQF:  // vcmpeqfp
-         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 );
+         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198, endness_host);
          break;
       case Pavfp_CMPGTF:  // vcmpgtfp
-         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 710 );
+         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 710, endness_host );
          break;
       case Pavfp_CMPGEF:  // vcmpgefp
-         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 454 );
+         p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 454, endness_host );
          break;
 
       default:
@@ -5144,7 +5192,7 @@
       default:
          goto bad;
       }
-      p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+      p = mkFormVX( p, 4, v_dst, 0, v_src, opc2, endness_host );
       goto done;
    }
 
@@ -5153,7 +5201,7 @@
       UInt v_srcL = vregNo(i->Pin.AvPerm.srcL);
       UInt v_srcR = vregNo(i->Pin.AvPerm.srcR);
       UInt v_ctl  = vregNo(i->Pin.AvPerm.ctl);
-      p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 43 );
+      p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 43, endness_host );
       goto done;
    }
 
@@ -5162,7 +5210,7 @@
       UInt v_dst  = vregNo(i->Pin.AvSel.dst);
       UInt v_srcL = vregNo(i->Pin.AvSel.srcL);
       UInt v_srcR = vregNo(i->Pin.AvSel.srcR);
-      p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 42 );
+      p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 42, endness_host );
       goto done;
    }
 
@@ -5172,7 +5220,7 @@
       UInt v_srcL = vregNo(i->Pin.AvShlDbl.srcL);
       UInt v_srcR = vregNo(i->Pin.AvShlDbl.srcR);
       vassert(shift <= 0xF);
-      p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, shift, 44 );
+      p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, shift, 44, endness_host );
       goto done;
    }
 
@@ -5189,7 +5237,7 @@
          simm5 = i->Pin.AvSplat.src->Pvi.Imm5s;
          vassert(simm5 >= -16 && simm5 <= 15);
          simm5 = simm5 & 0x1F;
-         p = mkFormVX( p, 4, v_dst, (UInt)simm5, 0, opc2 );
+         p = mkFormVX( p, 4, v_dst, (UInt)simm5, 0, opc2, endness_host );
       }
       else {  // Pri_Reg
          UInt lowest_lane;
@@ -5197,7 +5245,7 @@
          vassert(hregClass(i->Pin.AvSplat.src->Pvi.Reg) == HRcVec128);
          v_src = vregNo(i->Pin.AvSplat.src->Pvi.Reg);
          lowest_lane = (128/sz)-1;
-         p = mkFormVX( p, 4, v_dst, lowest_lane, v_src, opc2 );
+         p = mkFormVX( p, 4, v_dst, lowest_lane, v_src, opc2, endness_host );
       }
       goto done;
    }
@@ -5214,16 +5262,17 @@
       /* jmp fwds 2 insns if !condition */
       if (cc.test != Pct_ALWAYS) {
          /* bc !ct,cf,n_bytes>>2 */
-         p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0);
+         p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0,
+                     endness_host);
       }
       /* vmr */
-      p = mkFormVX( p, 4, v_dst, v_src, v_src, 1156 );
+      p = mkFormVX( p, 4, v_dst, v_src, v_src, 1156, endness_host );
       goto done;
    }
 
    case Pin_AvLdVSCR: {  // mtvscr
       UInt v_src = vregNo(i->Pin.AvLdVSCR.src);
-      p = mkFormVX( p, 4, 0, 0, v_src, 1604 );
+      p = mkFormVX( p, 4, 0, 0, v_src, 1604, endness_host );
       goto done;
    }
 
@@ -5233,23 +5282,23 @@
 
       switch (i->Pin.Dfp64Unary.op) {
       case Pfp_MOV: // fmr, PPC32 p410
-         p = mkFormX( p, 63, fr_dst, 0, fr_src, 72, 0 );
+         p = mkFormX( p, 63, fr_dst, 0, fr_src, 72, 0, endness_host );
          break;
       case Pfp_DCTDP:   // D32 to D64
-         p = mkFormX( p, 59, fr_dst, 0, fr_src, 258, 0 );
+         p = mkFormX( p, 59, fr_dst, 0, fr_src, 258, 0, endness_host );
          break;
       case Pfp_DRSP:    // D64 to D32
-         p = mkFormX( p, 59, fr_dst, 0, fr_src, 770, 0 );
+         p = mkFormX( p, 59, fr_dst, 0, fr_src, 770, 0, endness_host );
          break;
       case Pfp_DCFFIX:   // I64 to D64 conversion
          /* ONLY WORKS ON POWER7 */
-         p = mkFormX( p, 59, fr_dst, 0, fr_src, 802, 0);
+         p = mkFormX( p, 59, fr_dst, 0, fr_src, 802, 0, endness_host );
          break;
       case Pfp_DCTFIX:   // D64 to I64 conversion
-         p = mkFormX( p, 59, fr_dst, 0, fr_src, 290, 0);
+         p = mkFormX( p, 59, fr_dst, 0, fr_src, 290, 0, endness_host );
          break;
       case Pfp_DXEX:     // Extract exponent
-         p = mkFormX( p, 59, fr_dst, 0, fr_src, 354, 0 );
+         p = mkFormX( p, 59, fr_dst, 0, fr_src, 354, 0, endness_host );
          break;                                
       default:
          goto bad;
@@ -5264,22 +5313,22 @@
       switch (i->Pin.Dfp64Binary.op) {
       case Pfp_DFPADD: /* dadd, dfp add, use default RM from reg ignore mode
                         * from the Iop instruction. */
-         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 2, 0 );
+         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 2, 0, endness_host );
          break;
       case Pfp_DFPSUB: /* dsub, dfp subtract, use default RM from reg ignore
                         * mode from the Iop instruction. */
-         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 514, 0 );
+         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 514, 0, endness_host );
          break;
       case Pfp_DFPMUL: /* dmul, dfp multipy, use default RM from reg ignore
                         * mode from the Iop instruction. */
-         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 34, 0 );
+         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 34, 0, endness_host );
          break;
       case Pfp_DFPDIV: /* ddiv, dfp divide, use default RM from reg ignore
                         * mode from the Iop instruction. */
-         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 546, 0 );
+         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 546, 0, endness_host );
          break;
       case Pfp_DIEX:  /* diex, insert exponent */
-         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 866, 0 );
+         p = mkFormX( p, 59, fr_dst, fr_srcL, fr_srcR, 866, 0, endness_host );
          break;
       default:
          goto bad;
@@ -5296,10 +5345,10 @@
 
       switch (i->Pin.DfpShift.op) {
       case Pfp_DSCLI:    /* dscli, DFP shift left by fr_srcR */
-         p = mkFormZ22( p, 59, fr_dst, fr_src, shift,  66, 0 );
+         p = mkFormZ22( p, 59, fr_dst, fr_src, shift,  66, 0, endness_host );
          break;
       case Pfp_DSCRI:    /* dscri, DFP shift right by fr_srcR */
-         p = mkFormZ22( p, 59, fr_dst, fr_src, shift,  98, 0 );
+         p = mkFormZ22( p, 59, fr_dst, fr_src, shift,  98, 0, endness_host );
          break;
       default:
          vex_printf("ERROR: emit_PPCInstr default case\n");
@@ -5318,14 +5367,14 @@
          /* Setup the upper and lower registers of the source operand
           * register pair.
           */
-         p = mkFormX( p, 63, 12, 0, fr_srcHi, 72, 0);
-         p = mkFormX( p, 63, 13, 0, fr_srcLo, 72, 0);
-         p = mkFormX( p, 63, 10, 0, 12, 354, 0 );
+         p = mkFormX( p, 63, 12, 0, fr_srcHi, 72, 0, endness_host );
+         p = mkFormX( p, 63, 13, 0, fr_srcLo, 72, 0, endness_host );
+         p = mkFormX( p, 63, 10, 0, 12, 354, 0, endness_host );
 
          /* The instruction will put the 64-bit result in
           * register 10.
           */
-         p = mkFormX(p, 63, fr_dst, 0, 10,  72, 0);
+         p = mkFormX(p, 63, fr_dst, 0, 10,  72, 0, endness_host);
          break;
       default:
          vex_printf("Error: emit_PPCInstr case Pin_DfpExtractExp, case inst Default\n");
@@ -5343,16 +5392,16 @@
       */
      switch (i->Pin.Dfp128Unary.op) {
      case Pfp_DCTQPQ: // D64 to D128, srcLo holds 64 bit operand              
-        p = mkFormX( p, 63, 12, 0, fr_srcLo, 72, 0);
+        p = mkFormX( p, 63, 12, 0, fr_srcLo, 72, 0, endness_host );
 
-        p = mkFormX( p, 63, 10, 0, 12, 258, 0 );
+        p = mkFormX( p, 63, 10, 0, 12, 258, 0, endness_host );
 
         /* The instruction will put the 128-bit result in
          * registers (10,11).  Note, the operand in the instruction only
          * reference the first of the two registers in the pair.
          */
-        p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0);
-        p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0);
+        p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0, endness_host);
+        p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0, endness_host);
         break;
      default:
         vex_printf("Error: emit_PPCInstr case Pin_Dfp128Unary, case inst Default\
@@ -5374,26 +5423,26 @@
       /* Setup the upper and lower registers of the source operand
        * register pair.
        */
-      p = mkFormX( p, 63, 10, 0, fr_dstHi, 72, 0 );
-      p = mkFormX( p, 63, 11, 0, fr_dstLo, 72, 0 );
-      p = mkFormX( p, 63, 12, 0, fr_srcRHi, 72, 0 );
-      p = mkFormX( p, 63, 13, 0, fr_srcRLo, 72, 0 );
+      p = mkFormX( p, 63, 10, 0, fr_dstHi, 72, 0, endness_host );
+      p = mkFormX( p, 63, 11, 0, fr_dstLo, 72, 0, endness_host );
+      p = mkFormX( p, 63, 12, 0, fr_srcRHi, 72, 0, endness_host );
+      p = mkFormX( p, 63, 13, 0, fr_srcRLo, 72, 0, endness_host );
 
       /* Do instruction with 128-bit source operands in registers (10,11)
        * and (12,13).
        */
       switch (i->Pin.Dfp128Binary.op) {
       case Pfp_DFPADDQ:
-         p = mkFormX( p, 63, 10, 10, 12, 2, 0 );
+         p = mkFormX( p, 63, 10, 10, 12, 2, 0, endness_host );
          break;
       case Pfp_DFPSUBQ:
-         p = mkFormX( p, 63, 10, 10, 12, 514, 0 );
+         p = mkFormX( p, 63, 10, 10, 12, 514, 0, endness_host );
          break;
       case Pfp_DFPMULQ:
-         p = mkFormX( p, 63, 10, 10, 12, 34, 0 );
+         p = mkFormX( p, 63, 10, 10, 12, 34, 0, endness_host );
          break;
       case Pfp_DFPDIVQ:
-         p = mkFormX( p, 63, 10, 10, 12, 546, 0 );
+         p = mkFormX( p, 63, 10, 10, 12, 546, 0, endness_host );
          break;
       default:
          goto bad;
@@ -5403,8 +5452,8 @@
        * registers (10,11).  Note, the operand in the instruction only
        * reference the first of the two registers in the pair.
        */
-      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0);
-      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0);
+      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0, endness_host);
+      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0, endness_host);
       goto done;
    }
 
@@ -5418,20 +5467,20 @@
       shift =  i->Pin.DfpShift128.shift->Pri.Imm;
 
       /* setup source operand in register 12, 13 pair */
-      p = mkFormX(p, 63, 12, 0, fr_src_hi, 72, 0);
-      p = mkFormX(p, 63, 13, 0, fr_src_lo, 72, 0);
+      p = mkFormX(p, 63, 12, 0, fr_src_hi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 13, 0, fr_src_lo, 72, 0, endness_host);
 
       /* execute instruction putting result in register 10, 11 pair */
       switch (i->Pin.DfpShift128.op) {
       case Pfp_DSCLIQ:    /* dscliq, DFP shift left, fr_srcR is the integer
                            * shift amount.
                            */
-         p = mkFormZ22( p, 63, 10, 12, shift,  66, 0 );
+         p = mkFormZ22( p, 63, 10, 12, shift,  66, 0, endness_host );
          break;
       case Pfp_DSCRIQ:    /* dscriq, DFP shift right, fr_srcR is the integer
                            * shift amount.
                            */
-         p = mkFormZ22( p, 63, 10, 12, shift,  98, 0 );
+         p = mkFormZ22( p, 63, 10, 12, shift,  98, 0, endness_host );
          break;
       default:
          vex_printf("ERROR: emit_PPCInstr quad default case %d \n",
@@ -5443,8 +5492,8 @@
        * Note, the operand in the instruction only reference the first of 
        * the two registers in the pair.
        */
-      p = mkFormX(p, 63, fr_dst_hi, 0, 10,  72, 0);
-      p = mkFormX(p, 63, fr_dst_lo, 0, 11,  72, 0);
+      p = mkFormX(p, 63, fr_dst_hi, 0, 10,  72, 0, endness_host);
+      p = mkFormX(p, 63, fr_dst_lo, 0, 11,  72, 0, endness_host);
       goto done;
    }
 
@@ -5458,7 +5507,7 @@
       rmc = r_rmc & 0x3;
 
       // drintx
-      p = mkFormZ23(p, 59, fr_dst, r, fr_src, rmc, 99, 0);
+      p = mkFormZ23(p, 59, fr_dst, r, fr_src, rmc, 99, 0, endness_host);
       goto done;
    }
 
@@ -5476,20 +5525,20 @@
       /* Setup the upper and lower registers of the source operand 
        * register pair.
        */
-      p = mkFormX(p, 63, 12, 0, fr_srcHi, 72, 0);
-      p = mkFormX(p, 63, 13, 0, fr_srcLo, 72, 0);
+      p = mkFormX(p, 63, 12, 0, fr_srcHi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 13, 0, fr_srcLo, 72, 0, endness_host);
 
       /* Do drintx instruction with 128-bit source operands in 
        * registers (12,13).  
        */
-      p = mkFormZ23(p, 63, 10, r, 12, rmc, 99, 0);
+      p = mkFormZ23(p, 63, 10, r, 12, rmc, 99, 0, endness_host);
 
       /* The instruction will put the 128-bit result in 
        * registers (10,11).  Note, the operand in the instruction only 
        * reference the first of the two registers in the pair.
        */
-      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0);
-      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0);
+      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0, endness_host);
+      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0, endness_host);
       goto done;
    }
 
@@ -5503,10 +5552,10 @@
 
       switch (i->Pin.DfpQuantize.op) {
       case Pfp_DQUA:
-         p = mkFormZ23(p, 59, fr_dst, fr_srcL, fr_srcR, rmc, 3, 0);
+         p = mkFormZ23(p, 59, fr_dst, fr_srcL, fr_srcR, rmc, 3, 0, endness_host);
          break;
       case Pfp_RRDTR:
-         p = mkFormZ23(p, 59, fr_dst, fr_srcL, fr_srcR, rmc, 35, 0);
+         p = mkFormZ23(p, 59, fr_dst, fr_srcL, fr_srcR, rmc, 35, 0, endness_host);
          break;
       default:
          break;
@@ -5526,20 +5575,20 @@
        * register pairs.  Note, left source operand passed in via the
        * dst register pair.
        */
-      p = mkFormX(p, 63, 10, 0, fr_dst_hi, 72, 0);
-      p = mkFormX(p, 63, 11, 0, fr_dst_lo, 72, 0);
-      p = mkFormX(p, 63, 12, 0, fr_src_hi, 72, 0);
-      p = mkFormX(p, 63, 13, 0, fr_src_lo, 72, 0);
+      p = mkFormX(p, 63, 10, 0, fr_dst_hi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 11, 0, fr_dst_lo, 72, 0, endness_host);
+      p = mkFormX(p, 63, 12, 0, fr_src_hi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 13, 0, fr_src_lo, 72, 0, endness_host);
 
       /* Do dquaq instruction with 128-bit source operands in 
        * registers (12,13).  
        */
       switch (i->Pin.DfpQuantize128.op) {
       case Pfp_DQUAQ:
-         p = mkFormZ23(p, 63, 10, 10, 12, rmc, 3, 0);
+         p = mkFormZ23(p, 63, 10, 10, 12, rmc, 3, 0, endness_host);
          break;
       case Pfp_DRRNDQ:
-         p = mkFormZ23(p, 63, 10, 10, 12, rmc, 35, 0);
+         p = mkFormZ23(p, 63, 10, 10, 12, rmc, 35, 0, endness_host);
          break;
       default:
          vpanic("Pin_DfpQuantize128: default case, couldn't find inst to issue \n");
@@ -5550,8 +5599,8 @@
        * registers (10,11).  Note, the operand in the instruction only 
        * reference the first of the two registers in the pair.
        */
-      p = mkFormX(p, 63, fr_dst_hi, 0, 10,  72, 0);
-      p = mkFormX(p, 63, fr_dst_lo, 0, 11,  72, 0);
+      p = mkFormX(p, 63, fr_dst_hi, 0, 10,  72, 0, endness_host);
+      p = mkFormX(p, 63, fr_dst_lo, 0, 11,  72, 0, endness_host);
       goto done;
    }
 
@@ -5563,24 +5612,24 @@
       /* Setup the upper and lower registers of the source operand
        * register pair.
        */
-      p = mkFormX( p, 63, 10, 0, fr_dst, 72, 0 );
-      p = mkFormX( p, 63, 12, 0, fr_srcHi, 72, 0 );
-      p = mkFormX( p, 63, 13, 0, fr_srcLo, 72, 0 );
+      p = mkFormX( p, 63, 10, 0, fr_dst, 72, 0, endness_host );
+      p = mkFormX( p, 63, 12, 0, fr_srcHi, 72, 0, endness_host );
+      p = mkFormX( p, 63, 13, 0, fr_srcLo, 72, 0, endness_host );
 
       /* Do instruction with 128-bit source operands in registers (10,11) */
       switch (i->Pin.Dfp128Binary.op) {
       case Pfp_DRDPQ:
-         p = mkFormX( p, 63, 10, 0, 12, 770, 0 );
+         p = mkFormX( p, 63, 10, 0, 12, 770, 0, endness_host );
          break;
       case Pfp_DCTFIXQ:
-         p = mkFormX( p, 63, 10, 0, 12, 290, 0 );
+         p = mkFormX( p, 63, 10, 0, 12, 290, 0, endness_host );
          break;
       default:
          goto bad;
       }
 
       /* The instruction will put the 64-bit result in registers 10. */
-      p = mkFormX(p, 63, fr_dst, 0, 10,  72, 0);
+      p = mkFormX(p, 63, fr_dst, 0, 10,  72, 0, endness_host);
       goto done;
    }
 
@@ -5591,15 +5640,15 @@
 
       switch (i->Pin.Dfp128Binary.op) {
       case Pfp_DCFFIXQ:
-         p = mkFormX( p, 63, 10, 11, fr_src, 802, 0 );
+         p = mkFormX( p, 63, 10, 11, fr_src, 802, 0, endness_host );
          break;
       default:
          goto bad;
       }
 
       /* The instruction will put the 64-bit result in registers 10, 11. */
-      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0);
-      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0);
+      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0, endness_host);
+      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0, endness_host);
       goto done;
    }
 
@@ -5613,17 +5662,17 @@
       /* The left operand is a single F64 value, the right is an F128
        * register pair.
        */
-      p = mkFormX(p, 63, 10, 0, fr_srcL, 72, 0);
-      p = mkFormX(p, 63, 12, 0, fr_srcRHi, 72, 0);
-      p = mkFormX(p, 63, 13, 0, fr_srcRLo, 72, 0);
-      p = mkFormX(p, 63, 10, 10, 12, 866, 0 );
+      p = mkFormX(p, 63, 10, 0, fr_srcL, 72, 0, endness_host);
+      p = mkFormX(p, 63, 12, 0, fr_srcRHi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 13, 0, fr_srcRLo, 72, 0, endness_host);
+      p = mkFormX(p, 63, 10, 10, 12, 866, 0, endness_host );
 
       /* The instruction will put the 128-bit result into
        * registers (10,11).  Note, the operand in the instruction only
        * reference the first of the two registers in the pair.
        */
-      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0);
-      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0);
+      p = mkFormX(p, 63, fr_dstHi, 0, 10,  72, 0, endness_host);
+      p = mkFormX(p, 63, fr_dstLo, 0, 11,  72, 0, endness_host);
       goto done;
    }                                                                           
 
@@ -5634,14 +5683,14 @@
       UInt  fr_srcR = fregNo(i->Pin.Dfp64Cmp.srcR);
       vassert(crfD < 8);
       // dcmpo, dcmpu
-      p = mkFormX(p, 59, crfD<<2, fr_srcL, fr_srcR, 130, 0);
+      p = mkFormX(p, 59, crfD<<2, fr_srcL, fr_srcR, 130, 0, endness_host);
 
       // mfcr (mv CR to r_dst)
-      p = mkFormX(p, 31, r_dst, 0, 0, 19, 0);
+      p = mkFormX(p, 31, r_dst, 0, 0, 19, 0, endness_host);
 
       // rlwinm r_dst,r_dst,8,28,31
       //  => rotate field 1 to bottomw of word, masking out upper 28
-      p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0);
+      p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0, endness_host);
       goto done;
    }
 
@@ -5657,19 +5706,19 @@
       /* Setup the upper and lower registers of the source operand
        * register pair.
        */
-      p = mkFormX(p, 63, 10, 0, fr_srcL_hi, 72, 0);
-      p = mkFormX(p, 63, 11, 0, fr_srcL_lo, 72, 0);
-      p = mkFormX(p, 63, 12, 0, fr_srcR_hi, 72, 0);
-      p = mkFormX(p, 63, 13, 0, fr_srcR_lo, 72, 0);
+      p = mkFormX(p, 63, 10, 0, fr_srcL_hi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 11, 0, fr_srcL_lo, 72, 0, endness_host);
+      p = mkFormX(p, 63, 12, 0, fr_srcR_hi, 72, 0, endness_host);
+      p = mkFormX(p, 63, 13, 0, fr_srcR_lo, 72, 0, endness_host);
 
-      p = mkFormX(p, 63, crfD<<2, 10, 12, 130, 0);
+      p = mkFormX(p, 63, crfD<<2, 10, 12, 130, 0, endness_host);
 
       // mfcr (mv CR to r_dst)
-      p = mkFormX(p, 31, r_dst, 0, 0, 19, 0);
+      p = mkFormX(p, 31, r_dst, 0, 0, 19, 0, endness_host);
 
       // rlwinm r_dst,r_dst,8,28,31
       //  => rotate field 1 to bottomw of word, masking out upper 28
-      p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0);
+      p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0, endness_host);
       goto done;
    }
 
@@ -5689,21 +5738,24 @@
       UChar* p0 = p;
       /* lwz r30, amCounter */
       p = do_load_or_store_word32(p, True/*isLoad*/, /*r*/30,
-                                  i->Pin.EvCheck.amCounter, mode64);
+                                  i->Pin.EvCheck.amCounter, mode64,
+                                  endness_host);
       /* addic. r30,r30,-1 */
-      p = emit32(p, 0x37DEFFFF);
+      p = emit32(p, 0x37DEFFFF, endness_host);
       /* stw r30, amCounter */
       p = do_load_or_store_word32(p, False/*!isLoad*/, /*r*/30,
-                                  i->Pin.EvCheck.amCounter, mode64);
+                                  i->Pin.EvCheck.amCounter, mode64,
+                                  endness_host);
       /* bge nofail */
-      p = emit32(p, 0x40800010);
+      p = emit32(p, 0x40800010, endness_host);
       /* lwz/ld r30, amFailAddr */
       p = do_load_or_store_machine_word(p, True/*isLoad*/, /*r*/30,
-                                        i->Pin.EvCheck.amFailAddr, mode64);
+                                        i->Pin.EvCheck.amFailAddr, mode64,
+                                        endness_host);
       /* mtctr r30 */
-      p = mkFormXFX(p, /*r*/30, 9, 467);
+      p = mkFormXFX(p, /*r*/30, 9, 467, endness_host);
       /* bctr */
-      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0);
+      p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0, endness_host);
       /* nofail: */
 
       /* Crosscheck */
@@ -5733,19 +5785,19 @@
       */
       if (mode64) {
          p = mkLoadImm_EXACTLY2or5(
-                p, /*r*/30, 0x6555655565556555ULL, True/*mode64*/);
-         p = emit32(p, 0xEBBE0000);
-         p = emit32(p, 0x3BBD0001);
-         p = emit32(p, 0xFBBE0000);
+                p, /*r*/30, 0x6555655565556555ULL, True/*mode64*/, endness_host);
+         p = emit32(p, 0xEBBE0000, endness_host);
+         p = emit32(p, 0x3BBD0001, endness_host);
+         p = emit32(p, 0xFBBE0000, endness_host);
       } else {
          p = mkLoadImm_EXACTLY2or5(
-                p, /*r*/30, 0x65556555ULL, False/*!mode64*/);
-         p = emit32(p, 0x83BE0004);
-         p = emit32(p, 0x37BD0001);
-         p = emit32(p, 0x93BE0004);
-         p = emit32(p, 0x83BE0000);
-         p = emit32(p, 0x7FBD0194);
-         p = emit32(p, 0x93BE0000);
+                p, /*r*/30, 0x65556555ULL, False/*!mode64*/, endness_host);
+         p = emit32(p, 0x83BE0004, endness_host);
+         p = emit32(p, 0x37BD0001, endness_host);
+         p = emit32(p, 0x93BE0004, endness_host);
+         p = emit32(p, 0x83BE0000, endness_host);
+         p = emit32(p, 0x7FBD0194, endness_host);
+         p = emit32(p, 0x93BE0000, endness_host);
       }
       /* Tell the caller .. */
       vassert(!(*is_profInc));
@@ -5787,7 +5839,8 @@
                                  Bool  mode64 )
 {
    if (mode64) {
-      vassert(endness_host == VexEndnessBE); /* later: or LE */
+      vassert((endness_host == VexEndnessBE) ||
+              (endness_host == VexEndnessLE));
    } else {
       vassert(endness_host == VexEndnessBE);
    }
@@ -5805,9 +5858,9 @@
    vassert(0 == (3 & (HWord)p));
    vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
                                  Ptr_to_ULong(disp_cp_chain_me_EXPECTED),
-                                 mode64));
-   vassert(fetch32(p + (mode64 ? 20 : 8) + 0) == 0x7FC903A6);
-   vassert(fetch32(p + (mode64 ? 20 : 8) + 4) == 0x4E800421);
+                                 mode64, endness_host));
+   vassert(fetch32(p + (mode64 ? 20 : 8) + 0, endness_host) == 0x7FC903A6);
+   vassert(fetch32(p + (mode64 ? 20 : 8) + 4, endness_host) == 0x4E800421);
    /* And what we want to change it to is:
         imm32/64-fixed r30, place_to_jump_to
         mtctr r30
@@ -5819,9 +5872,10 @@
       The replacement has the same length as the original.
    */
    p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
-                             Ptr_to_ULong(place_to_jump_to), mode64);
-   p = emit32(p, 0x7FC903A6);
-   p = emit32(p, 0x4E800420);
+                             Ptr_to_ULong(place_to_jump_to), mode64, 
+                             endness_host);
+   p = emit32(p, 0x7FC903A6, endness_host);
+   p = emit32(p, 0x4E800420, endness_host);
 
    Int len = p - (UChar*)place_to_chain;
    vassert(len == (mode64 ? 28 : 16)); /* stay sane */
@@ -5839,7 +5893,8 @@
                                    Bool  mode64 )
 {
    if (mode64) {
-      vassert(endness_host == VexEndnessBE); /* later: or LE */
+      vassert((endness_host == VexEndnessBE) ||
+              (endness_host == VexEndnessLE));
    } else {
       vassert(endness_host == VexEndnessBE);
    }
@@ -5857,9 +5912,9 @@
    vassert(0 == (3 & (HWord)p));
    vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
                                  Ptr_to_ULong(place_to_jump_to_EXPECTED),
-                                 mode64));
-   vassert(fetch32(p + (mode64 ? 20 : 8) + 0) == 0x7FC903A6);
-   vassert(fetch32(p + (mode64 ? 20 : 8) + 4) == 0x4E800420);
+                                 mode64, endness_host));
+   vassert(fetch32(p + (mode64 ? 20 : 8) + 0, endness_host) == 0x7FC903A6);
+   vassert(fetch32(p + (mode64 ? 20 : 8) + 4, endness_host) == 0x4E800420);
    /* And what we want to change it to is:
         imm32/64-fixed r30, disp_cp_chain_me
         mtctr r30
@@ -5871,9 +5926,10 @@
       The replacement has the same length as the original.
    */
    p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
-                             Ptr_to_ULong(disp_cp_chain_me), mode64);
-   p = emit32(p, 0x7FC903A6);
-   p = emit32(p, 0x4E800421);
+                             Ptr_to_ULong(disp_cp_chain_me), mode64, 
+                             endness_host);
+   p = emit32(p, 0x7FC903A6, endness_host);
+   p = emit32(p, 0x4E800421, endness_host);
 
    Int len = p - (UChar*)place_to_unchain;
    vassert(len == (mode64 ? 28 : 16)); /* stay sane */
@@ -5890,7 +5946,8 @@
                                  Bool   mode64 )
 {
    if (mode64) {
-      vassert(endness_host == VexEndnessBE); /* later: or LE */
+      vassert((endness_host == VexEndnessBE) ||
+              (endness_host == VexEndnessLE));
    } else {
       vassert(endness_host == VexEndnessBE);
    }
@@ -5901,27 +5958,29 @@
    Int len = 0;
    if (mode64) {
       vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
-                                    0x6555655565556555ULL, True/*mode64*/));
-      vassert(fetch32(p + 20) == 0xEBBE0000);
-      vassert(fetch32(p + 24) == 0x3BBD0001);
-      vassert(fetch32(p + 28) == 0xFBBE0000);
+                                    0x6555655565556555ULL, True/*mode64*/,
+                                    endness_host));
+      vassert(fetch32(p + 20, endness_host) == 0xEBBE0000);
+      vassert(fetch32(p + 24, endness_host) == 0x3BBD0001);
+      vassert(fetch32(p + 28, endness_host) == 0xFBBE0000);
       p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
                                 Ptr_to_ULong(location_of_counter),
-                                True/*mode64*/);
+                                True/*mode64*/, endness_host);
       len = p - (UChar*)place_to_patch;
       vassert(len == 20);
    } else {
       vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
-                                    0x65556555ULL, False/*!mode64*/));
-      vassert(fetch32(p +  8) == 0x83BE0004);
-      vassert(fetch32(p + 12) == 0x37BD0001);
-      vassert(fetch32(p + 16) == 0x93BE0004);
-      vassert(fetch32(p + 20) == 0x83BE0000);
-      vassert(fetch32(p + 24) == 0x7FBD0194);
-      vassert(fetch32(p + 28) == 0x93BE0000);
+                                    0x65556555ULL, False/*!mode64*/, 
+                                    endness_host));
+      vassert(fetch32(p +  8, endness_host) == 0x83BE0004);
+      vassert(fetch32(p + 12, endness_host) == 0x37BD0001);
+      vassert(fetch32(p + 16, endness_host) == 0x93BE0004);
+      vassert(fetch32(p + 20, endness_host) == 0x83BE0000);
+      vassert(fetch32(p + 24, endness_host) == 0x7FBD0194);
+      vassert(fetch32(p + 28, endness_host) == 0x93BE0000);
       p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
                                 Ptr_to_ULong(location_of_counter),
-                                False/*!mode64*/);
+                                False/*!mode64*/, endness_host);
       len = p - (UChar*)place_to_patch;
       vassert(len == 8);
    }
diff --git a/priv/host_ppc_isel.c b/priv/host_ppc_isel.c
index d675120..95ed7b6 100644
--- a/priv/host_ppc_isel.c
+++ b/priv/host_ppc_isel.c
@@ -379,8 +379,10 @@
 */
 /* 32-bit mode: compute an I8/I16/I32 into a GPR.
    64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
-static HReg          iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e );
-static HReg          iselWordExpr_R     ( ISelEnv* env, IRExpr* e );
+static HReg          iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e,
+                                          IREndness IEndianess );
+static HReg          iselWordExpr_R     ( ISelEnv* env, IRExpr* e,
+                                          IREndness IEndianess );
 
 /* 32-bit mode: Compute an I8/I16/I32 into a RH
                 (reg-or-halfword-immediate).
@@ -392,26 +394,34 @@
    return can have their sign inverted if need be. 
 */
 static PPCRH*        iselWordExpr_RH_wrk ( ISelEnv* env, 
-                                           Bool syned, IRExpr* e );
+                                           Bool syned, IRExpr* e,
+                                           IREndness IEndianess );
 static PPCRH*        iselWordExpr_RH     ( ISelEnv* env, 
-                                           Bool syned, IRExpr* e );
+                                           Bool syned, IRExpr* e,
+                                           IREndness IEndianess );
 
 /* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
    64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
-static PPCRI*        iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
-static PPCRI*        iselWordExpr_RI     ( ISelEnv* env, IRExpr* e );
+static PPCRI*        iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e,
+                                           IREndness IEndianess );
+static PPCRI*        iselWordExpr_RI     ( ISelEnv* env, IRExpr* e,
+                                           IREndness IEndianess );
 
 /* In 32 bit mode ONLY, compute an I8 into a
    reg-or-5-bit-unsigned-immediate, the latter being an immediate in
    the range 1 .. 31 inclusive.  Used for doing shift amounts. */
-static PPCRH*        iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e );
-static PPCRH*        iselWordExpr_RH5u     ( ISelEnv* env, IRExpr* e );
+static PPCRH*        iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e,
+                                             IREndness IEndianess );
+static PPCRH*        iselWordExpr_RH5u     ( ISelEnv* env, IRExpr* e,
+                                             IREndness IEndianess );
 
 /* In 64-bit mode ONLY, compute an I8 into a
    reg-or-6-bit-unsigned-immediate, the latter being an immediate in
    the range 1 .. 63 inclusive.  Used for doing shift amounts. */
-static PPCRH*        iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e );
-static PPCRH*        iselWordExpr_RH6u     ( ISelEnv* env, IRExpr* e );
+static PPCRH*        iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e,
+                                             IREndness IEndianess );
+static PPCRH*        iselWordExpr_RH6u     ( ISelEnv* env, IRExpr* e,
+                                             IREndness IEndianess );
 
 /* 32-bit mode: compute an I32 into an AMode.
    64-bit mode: compute an I64 into an AMode.
@@ -424,52 +434,75 @@
 
    Since there are no such restrictions on 32-bit insns, xferTy is
    ignored for 32-bit code generation. */
-static PPCAMode*     iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType xferTy );
-static PPCAMode*     iselWordExpr_AMode     ( ISelEnv* env, IRExpr* e, IRType xferTy );
+static PPCAMode*     iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e,
+                                              IRType xferTy,
+                                              IREndness IEndianess );
+static PPCAMode*     iselWordExpr_AMode     ( ISelEnv* env, IRExpr* e,
+                                              IRType xferTy,
+                                              IREndness IEndianess );
 
 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
                                          HReg* rMedLo, HReg* rLo,
-                                         ISelEnv* env, IRExpr* e );
+                                         ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
 static void iselInt128Expr_to_32x4     ( HReg* rHi, HReg* rMedHi,
                                          HReg* rMedLo, HReg* rLo,
-                                         ISelEnv* env, IRExpr* e );
+                                         ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
 
 
 /* 32-bit mode ONLY: compute an I64 into a GPR pair. */
-static void          iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, 
-                                         ISelEnv* env, IRExpr* e );
-static void          iselInt64Expr     ( HReg* rHi, HReg* rLo, 
-                                         ISelEnv* env, IRExpr* e );
+static void          iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
+                                         ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
+static void          iselInt64Expr     ( HReg* rHi, HReg* rLo,
+                                         ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
 
 /* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
 static void          iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, 
-                                          ISelEnv* env, IRExpr* e );
+                                          ISelEnv* env, IRExpr* e,
+                                          IREndness IEndianess );
+
 static void          iselInt128Expr     ( HReg* rHi, HReg* rLo, 
-                                          ISelEnv* env, IRExpr* e );
+                                          ISelEnv* env, IRExpr* e,
+                                          IREndness IEndianess );
 
-static PPCCondCode   iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
-static PPCCondCode   iselCondCode     ( ISelEnv* env, IRExpr* e );
+static PPCCondCode   iselCondCode_wrk ( ISelEnv* env, IRExpr* e,
+                                        IREndness IEndianess );
+static PPCCondCode   iselCondCode     ( ISelEnv* env, IRExpr* e,
+                                        IREndness IEndianess );
 
-static HReg          iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
-static HReg          iselDblExpr     ( ISelEnv* env, IRExpr* e );
+static HReg          iselDblExpr_wrk ( ISelEnv* env, IRExpr* e,
+                                       IREndness IEndianess );
+static HReg          iselDblExpr     ( ISelEnv* env, IRExpr* e,
+                                       IREndness IEndianess );
 
-static HReg          iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
-static HReg          iselFltExpr     ( ISelEnv* env, IRExpr* e );
+static HReg          iselFltExpr_wrk ( ISelEnv* env, IRExpr* e,
+                                       IREndness IEndianess );
+static HReg          iselFltExpr     ( ISelEnv* env, IRExpr* e,
+                                       IREndness IEndianess );
 
-static HReg          iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
-static HReg          iselVecExpr     ( ISelEnv* env, IRExpr* e );
+static HReg          iselVecExpr_wrk ( ISelEnv* env, IRExpr* e,
+                                       IREndness IEndianess );
+static HReg          iselVecExpr     ( ISelEnv* env, IRExpr* e,
+                                       IREndness IEndianess );
 
 /* 64-bit mode ONLY. */
-static HReg          iselDfp32Expr_wrk ( ISelEnv* env, IRExpr* e );
-static HReg          iselDfp32Expr     ( ISelEnv* env, IRExpr* e );
-static HReg          iselDfp64Expr_wrk ( ISelEnv* env, IRExpr* e );
-static HReg          iselDfp64Expr     ( ISelEnv* env, IRExpr* e );
+static HReg          iselDfp32Expr_wrk ( ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
+static HReg          iselDfp32Expr     ( ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
+static HReg          iselDfp64Expr_wrk ( ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
+static HReg          iselDfp64Expr     ( ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess );
 
 /* 64-bit mode ONLY: compute an D128 into a GPR64 pair. */
 static void iselDfp128Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env,
-                                 IRExpr* e );
+                                 IRExpr* e, IREndness IEndianess );
 static void iselDfp128Expr     ( HReg* rHi, HReg* rLo, ISelEnv* env,
-                                 IRExpr* e );
+                                 IRExpr* e, IREndness IEndianess );
 
 /*---------------------------------------------------------*/
 /*--- ISEL: Misc helpers                                ---*/
@@ -596,7 +629,7 @@
    guest state.  */
 static
 PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
-                                IRExpr* off, Int bias )
+                                IRExpr* off, Int bias, IREndness IEndianess )
 {
    HReg rtmp, roff;
    Int  elemSz = sizeofIRType(descr->elemTy);
@@ -628,7 +661,7 @@
          addi %tmp, %tmp, base
          ... Baseblockptr + %tmp ...
    */
-   roff = iselWordExpr_R(env, off);
+   roff = iselWordExpr_R(env, off, IEndianess);
    rtmp = newVRegI(env);
    addInstr(env, PPCInstr_Alu(
                     Palu_ADD, 
@@ -685,7 +718,8 @@
                     /*OUT*/RetLoc* retloc,
                     ISelEnv* env,
                     IRExpr* guard,
-                    IRCallee* cee, IRType retTy, IRExpr** args )
+                    IRCallee* cee, IRType retTy, IRExpr** args,
+                    IREndness IEndianess)
 {
    PPCCondCode cc;
    HReg        argregs[PPC_N_REGPARMS];
@@ -858,14 +892,15 @@
                   argiregs |= (1 << (argreg+3));
                   addInstr(env,
                            mk_iMOVds_RR( argregs[argreg],
-                                         iselWordExpr_R(env, arg) ));
+                                         iselWordExpr_R(env, arg,
+							IEndianess) ));
                } else { // Ity_I64 in 32-bit mode
                   HReg rHi, rLo;
                   if ((argreg%2) == 1)
                                  // ppc32 ELF abi spec for passing LONG_LONG
                      argreg++;   // XXX: odd argreg => even rN
                   vassert(argreg < PPC_N_REGPARMS-1);
-                  iselInt64Expr(&rHi,&rLo, env, arg);
+                  iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
                   argiregs |= (1 << (argreg+3));
                   addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
                   argiregs |= (1 << (argreg+3));
@@ -874,7 +909,8 @@
             } else { // mode64
                argiregs |= (1 << (argreg+3));
                addInstr(env, mk_iMOVds_RR( argregs[argreg],
-                                           iselWordExpr_R(env, arg) ));
+                                           iselWordExpr_R(env, arg,
+                                                          IEndianess) ));
             }
             argreg++;
          } /* if (arg == IRExprP__BBPR) */
@@ -934,19 +970,19 @@
             vassert(ty == Ity_I32 || ty == Ity_I64);
             if (!mode64) {
                if (ty == Ity_I32) { 
-                  tmpregs[argreg] = iselWordExpr_R(env, arg);
+                  tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
                } else { // Ity_I64 in 32-bit mode
                   HReg rHi, rLo;
                   if ((argreg%2) == 1)
                                 // ppc32 ELF abi spec for passing LONG_LONG
                      argreg++;  // XXX: odd argreg => even rN
                   vassert(argreg < PPC_N_REGPARMS-1);
-                  iselInt64Expr(&rHi,&rLo, env, arg);
+                  iselInt64Expr(&rHi,&rLo, env, arg, IEndianess);
                   tmpregs[argreg++] = rHi;
                   tmpregs[argreg]   = rLo;
                }
             } else { // mode64
-               tmpregs[argreg] = iselWordExpr_R(env, arg);
+               tmpregs[argreg] = iselWordExpr_R(env, arg, IEndianess);
             }
          }
          argreg++;
@@ -963,7 +999,7 @@
              && guard->Iex.Const.con->Ico.U1 == True) {
             /* unconditional -- do nothing */
          } else {
-            cc = iselCondCode( env, guard );
+            cc = iselCondCode( env, guard, IEndianess );
          }
       }
 
@@ -1125,7 +1161,8 @@
    on any block with any sign of floating point activity.
 */
 static
-void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm )
+void _set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode, Bool dfp_rm,
+                              IREndness IEndianess )
 {
    HReg fr_src = newVRegF(env);
    HReg r_src;
@@ -1149,7 +1186,7 @@
       0x0 - so we can set the whole register at once (faster). */
 
    // Resolve rounding mode and convert to PPC representation
-   r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode) );
+   r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode, IEndianess) );
 
    // gpr -> fpr
    if (env->mode64) {
@@ -1176,14 +1213,16 @@
    addInstr(env, PPCInstr_FpLdFPSCR( fr_src, dfp_rm ));
 }
 
-static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
+static void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode,
+                                    IREndness IEndianess )
 {
-   _set_FPU_rounding_mode(env, mode, False);
+   _set_FPU_rounding_mode(env, mode, False, IEndianess);
 }
 
-static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
+static void set_FPU_DFP_rounding_mode ( ISelEnv* env, IRExpr* mode,
+                                        IREndness IEndianess )
 {
-   _set_FPU_rounding_mode(env, mode, True);
+   _set_FPU_rounding_mode(env, mode, True, IEndianess);
 }
 
 
@@ -1218,11 +1257,11 @@
   - uses AvSplat(imm) for imms up to simm6.
     otherwise must use store reg & load vector
 */
-static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
+static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
    HReg   r_src;
    HReg   dst = newVRegV(env);
-   PPCRI* ri  = iselWordExpr_RI(env, e);
+   PPCRI* ri  = iselWordExpr_RI(env, e, IEndianess);
    IRType ty  = typeOfIRExpr(env->type_env,e);
    UInt   sz  = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
    vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
@@ -1269,44 +1308,43 @@
       r_src = ri->Pri.Reg;
    }
 
-   /* default case: store r_src in lowest lane of 16-aligned mem,
-      load vector, splat lowest lane to dst */
    {
-      /* CAB: Maybe faster to store r_src multiple times (sz dependent),
-              and simply load the vector? */
+      /* Store r_src multiple times (sz dependent); then load the dest vector. */
       HReg r_aligned16;
-      HReg v_src = newVRegV(env);
-      PPCAMode *am_off12;
+      PPCAMode *am_offset, *am_offset_zero;
 
       sub_from_sp( env, 32 );     // Move SP down
       /* Get a 16-aligned address within our stack space */
       r_aligned16 = get_sp_aligned16( env );
-      am_off12 = PPCAMode_IR( 12, r_aligned16 );
 
-      /* Store r_src in low word of 16-aligned mem */
-      addInstr(env, PPCInstr_Store( 4, am_off12, r_src, env->mode64 ));
+      Int i;
+      Int stride = (sz == 8) ? 1 : (sz == 16) ? 2 : 4;
+      UChar num_bytes_to_store = stride;
+      am_offset_zero = PPCAMode_IR( 0, r_aligned16 );
+      am_offset = am_offset_zero;
+      for (i = 0; i < 16; i+=stride, am_offset = PPCAMode_IR( i, r_aligned16)) {
+         addInstr(env, PPCInstr_Store( num_bytes_to_store, am_offset, r_src, env->mode64 ));
+      }
 
-      /* Load src to vector[low lane] */
-      addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, v_src, am_off12 ) );
+      /* Effectively splat the r_src value to dst */
+      addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_offset_zero ) );
       add_to_sp( env, 32 );       // Reset SP
 
-      /* Finally, splat v_src[low_lane] to dst */
-      addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Reg(v_src)));
       return dst;
    }
 }
 
 
 /* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
-static HReg isNan ( ISelEnv* env, HReg vSrc )
+static HReg isNan ( ISelEnv* env, HReg vSrc, IREndness IEndianess )
 {
    HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
  
    vassert(hregClass(vSrc) == HRcVec128);
 
-   zeros   = mk_AvDuplicateRI(env, mkU32(0));
-   msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000));
-   msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF));
+   zeros   = mk_AvDuplicateRI(env, mkU32(0), IEndianess);
+   msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000), IEndianess);
+   msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF), IEndianess);
    expt    = newVRegV(env);
    mnts    = newVRegV(env);
    vIsNan  = newVRegV(env); 
@@ -1342,9 +1380,9 @@
    if necessary.
 */
 
-static HReg iselWordExpr_R ( ISelEnv* env, IRExpr* e )
+static HReg iselWordExpr_R ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
-   HReg r = iselWordExpr_R_wrk(env, e);
+   HReg r = iselWordExpr_R_wrk(env, e, IEndianess);
    /* sanity checks ... */
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
@@ -1356,7 +1394,8 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
+static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e,
+                                 IREndness IEndianess )
 {
    Bool mode64 = env->mode64;
    MatchInfo mi;
@@ -1376,10 +1415,11 @@
    case Iex_Load: {
       HReg      r_dst;
       PPCAMode* am_addr;
-      if (e->Iex.Load.end != Iend_BE)
+      if (e->Iex.Load.end != IEndianess)
          goto irreducible;
       r_dst   = newVRegI(env);
-      am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/ );
+      am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/,
+                                    IEndianess );
       addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)), 
                                    r_dst, am_addr, mode64 ));
       return r_dst;
@@ -1410,17 +1450,17 @@
          values are on the second operand. */
       if (aluOp != Palu_INVALID) {
          HReg   r_dst   = newVRegI(env);
-         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
+         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
          PPCRH* ri_srcR = NULL;
          /* get right arg into an RH, in the appropriate way */
          switch (aluOp) {
          case Palu_ADD: case Palu_SUB:
             ri_srcR = iselWordExpr_RH(env, True/*signed*/, 
-                                      e->Iex.Binop.arg2);
+                                      e->Iex.Binop.arg2, IEndianess);
             break;
          case Palu_AND: case Palu_OR: case Palu_XOR:
             ri_srcR = iselWordExpr_RH(env, False/*signed*/,
-                                      e->Iex.Binop.arg2);
+                                      e->Iex.Binop.arg2, IEndianess);
             break;
          default:
             vpanic("iselWordExpr_R_wrk-aluOp-arg2");
@@ -1443,15 +1483,15 @@
       /* we assume any literal values are on the second operand. */
       if (shftOp != Pshft_INVALID) {
          HReg   r_dst   = newVRegI(env);
-         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
+         HReg   r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
          PPCRH* ri_srcR = NULL;
          /* get right arg into an RH, in the appropriate way */
          switch (shftOp) {
          case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
             if (!mode64)
-               ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2);
+               ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2, IEndianess);
             else
-               ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2);
+               ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2, IEndianess);
             break;
          default:
             vpanic("iselIntExpr_R_wrk-shftOp-arg2");
@@ -1492,8 +1532,8 @@
           e->Iex.Binop.op == Iop_DivU32E) {
          Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
          HReg r_dst  = newVRegI(env);
-         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          addInstr( env,
                       PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
                                              || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
@@ -1510,8 +1550,8 @@
           || e->Iex.Binop.op == Iop_DivU64E ) {
          Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
          HReg r_dst  = newVRegI(env);
-         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          vassert(mode64);
          addInstr( env,
                       PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
@@ -1532,8 +1572,8 @@
          Bool syned       = False;
          Bool sz32        = (e->Iex.Binop.op != Iop_Mul64);
          HReg r_dst       = newVRegI(env);
-         HReg r_srcL      = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg r_srcR      = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r_srcL      = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg r_srcR      = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
                                      r_dst, r_srcL, r_srcR));
          return r_dst;
@@ -1547,8 +1587,8 @@
          HReg tHi    = newVRegI(env);
          HReg r_dst  = newVRegI(env);
          Bool syned  = toBool(e->Iex.Binop.op == Iop_MullS32);
-         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/, 
                                      False/*lo32*/, True/*32bit mul*/,
                                      tLo, r_srcL, r_srcR));
@@ -1567,8 +1607,9 @@
           || e->Iex.Binop.op == Iop_CmpORD32U) {
          Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
          HReg   dst   = newVRegI(env);
-         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
+                                        IEndianess);
          addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
                                     7/*cr*/, srcL, srcR));
          addInstr(env, PPCInstr_MfCR(dst));
@@ -1581,8 +1622,9 @@
           || e->Iex.Binop.op == Iop_CmpORD64U) {
          Bool   syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
          HReg   dst   = newVRegI(env);
-         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+         HReg   srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         PPCRH* srcR  = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2,
+                                        IEndianess);
          vassert(mode64);
          addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
                                     7/*cr*/, srcL, srcR));
@@ -1593,8 +1635,8 @@
       }
 
       if (e->Iex.Binop.op == Iop_Max32U) {
-         HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg        r1   = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg        r2   = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          HReg        rdst = newVRegI(env);
          PPCCondCode cc   = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
          addInstr(env, mk_iMOVds_RR(rdst, r1));
@@ -1605,8 +1647,8 @@
       }
 
       if (e->Iex.Binop.op == Iop_32HLto64) {
-         HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg   r_Hi  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg   r_Lo  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          HReg   r_Tmp = newVRegI(env);
          HReg   r_dst = newVRegI(env);
          HReg   msk   = newVRegI(env);
@@ -1637,18 +1679,20 @@
          HReg r_ccIR_b6 = newVRegI(env);
 
          if (e->Iex.Binop.op == Iop_CmpF64) {
-            fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1);
-            fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2);
+            fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
+            fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
             addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
 
          } else if (e->Iex.Binop.op == Iop_CmpD64) {
-            fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1);
-            fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2);
+            fr_srcL = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
+            fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
             addInstr(env, PPCInstr_Dfp64Cmp(r_ccPPC, fr_srcL, fr_srcR));
 
          } else {    //  e->Iex.Binop.op == Iop_CmpD128
-            iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1);
-            iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2);
+            iselDfp128Expr(&fr_srcL, &fr_srcL_lo, env, e->Iex.Binop.arg1,
+                           IEndianess);
+            iselDfp128Expr(&fr_srcR, &fr_srcR_lo, env, e->Iex.Binop.arg2,
+                           IEndianess);
             addInstr(env, PPCInstr_Dfp128Cmp(r_ccPPC, fr_srcL, fr_srcL_lo,
                                              fr_srcR, fr_srcR_lo));
          }
@@ -1705,12 +1749,12 @@
          /* This works in both mode64 and mode32. */
          HReg      r1      = StackFramePtr(env->mode64);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
-         HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2);
+         HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg      ftmp    = newVRegF(env);
          HReg      idst    = newVRegI(env);
 
          /* Set host rounding mode */
-         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
          sub_from_sp( env, 16 );
          addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
@@ -1736,12 +1780,13 @@
          if (mode64) {
             HReg      r1      = StackFramePtr(env->mode64);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
-            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2);
+            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
+                                            IEndianess);
             HReg      idst    = newVRegI(env);         
             HReg      ftmp    = newVRegF(env);
 
             /* Set host rounding mode */
-            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
             sub_from_sp( env, 16 );
             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
@@ -1761,12 +1806,12 @@
       if (e->Iex.Binop.op == Iop_D64toI64S ) {
          HReg      r1      = StackFramePtr(env->mode64);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
-         HReg      fr_src  = iselDfp64Expr(env, e->Iex.Binop.arg2);
+         HReg      fr_src  = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
          HReg      idst    = newVRegI(env);
          HReg      ftmp    = newVRegF(env);
 
          /* Set host rounding mode */
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, ftmp, fr_src));
          sub_from_sp( env, 16 );
          addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
@@ -1787,8 +1832,9 @@
          HReg ftmp    = newVRegF(env);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
+                        IEndianess);
          addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
 
          // put the D64 result into an integer register
@@ -1811,7 +1857,7 @@
       if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
          IRExpr* expr32 = mi.bindee[0];
          HReg r_dst = newVRegI(env);
-         HReg r_src = iselWordExpr_R(env, expr32);
+         HReg r_src = iselWordExpr_R(env, expr32, IEndianess);
          addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
                                     r_src, PPCRH_Imm(False,1)));
          return r_dst;
@@ -1822,11 +1868,12 @@
          DECLARE_PATTERN(p_LDbe16_then_16Uto32);
          DEFINE_PATTERN(p_LDbe16_then_16Uto32,
                         unop(Iop_16Uto32,
-                             IRExpr_Load(Iend_BE,Ity_I16,bind(0))) );
+                             IRExpr_Load(IEndianess,Ity_I16,bind(0))) );
          if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
             HReg r_dst = newVRegI(env);
             PPCAMode* amode
-               = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/ );
+               = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/,
+                                     IEndianess );
             addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
             return r_dst;
          }
@@ -1839,7 +1886,7 @@
       case Iop_16Uto32:
       case Iop_16Uto64: {
          HReg   r_dst = newVRegI(env);
-         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          UShort mask  = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
                                  op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
          addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
@@ -1848,7 +1895,7 @@
       }
       case Iop_32Uto64: {
          HReg r_dst = newVRegI(env);
-         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          vassert(mode64);
          addInstr(env,
                   PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
@@ -1862,7 +1909,7 @@
       case Iop_8Sto32:
       case Iop_16Sto32: {
          HReg   r_dst = newVRegI(env);
-         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          UShort amt   = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
          addInstr(env,
                   PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
@@ -1875,7 +1922,7 @@
       case Iop_8Sto64:
       case Iop_16Sto64: {
          HReg   r_dst = newVRegI(env);
-         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 : 48);
          vassert(mode64);
          addInstr(env,
@@ -1888,7 +1935,7 @@
       }
       case Iop_32Sto64: {
          HReg   r_dst = newVRegI(env);
-         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
 	 vassert(mode64);
          /* According to the IBM docs, in 64 bit mode, srawi r,r,0
             sign extends the lower 32 bits into the upper 32 bits. */
@@ -1903,18 +1950,18 @@
       case Iop_Not64: {
          if (op_unop == Iop_Not64) vassert(mode64);
          HReg r_dst = newVRegI(env);
-         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
          return r_dst;
       }
       case Iop_64HIto32: {
          if (!mode64) {
             HReg rHi, rLo;
-            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
             return rHi; /* and abandon rLo .. poor wee thing :-) */
          } else {
             HReg   r_dst = newVRegI(env);
-            HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+            HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
             addInstr(env,
                      PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
                                    r_dst, r_src, PPCRH_Imm(False,32)));
@@ -1924,23 +1971,23 @@
       case Iop_64to32: {
          if (!mode64) {
             HReg rHi, rLo;
-            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
             return rLo; /* similar stupid comment to the above ... */
          } else {
             /* This is a no-op. */
-            return iselWordExpr_R(env, e->Iex.Unop.arg);
+            return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          }
       }
       case Iop_64to16: {
          if (mode64) { /* This is a no-op. */
-            return iselWordExpr_R(env, e->Iex.Unop.arg);
+            return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          }
          break; /* evidently not used in 32-bit mode */
       }
       case Iop_16HIto8:
       case Iop_32HIto16: {
          HReg   r_dst = newVRegI(env);
-         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
          addInstr(env,
                   PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
@@ -1950,14 +1997,14 @@
       case Iop_128HIto64: 
          if (mode64) {
             HReg rHi, rLo;
-            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
             return rHi; /* and abandon rLo .. poor wee thing :-) */
          }
          break;
       case Iop_128to64:
          if (mode64) {
             HReg rHi, rLo;
-            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg, IEndianess);
             return rLo; /* similar stupid comment to the above ... */
          }
          break;
@@ -1966,7 +2013,7 @@
       case Iop_1Uto8:
          if ((op_unop != Iop_1Uto64) || mode64) {
             HReg        r_dst = newVRegI(env);
-            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
+            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
             addInstr(env, PPCInstr_Set(cond,r_dst));
             return r_dst;
          }
@@ -1976,7 +2023,7 @@
       case Iop_1Sto32: {
          /* could do better than this, but for now ... */
          HReg        r_dst = newVRegI(env);
-         PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
+         PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Set(cond,r_dst));
          addInstr(env,
                   PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
@@ -1990,7 +2037,7 @@
          if (mode64) {
             /* could do better than this, but for now ... */
             HReg        r_dst = newVRegI(env);
-            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg);
+            PPCCondCode cond  = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
             addInstr(env, PPCInstr_Set(cond,r_dst));
             addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
                                         r_dst, r_dst, PPCRH_Imm(False,63)));
@@ -2008,7 +2055,7 @@
             goto irreducible;
          /* Count leading zeroes. */
          r_dst = newVRegI(env);
-         r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
          return r_dst;
       }
@@ -2021,7 +2068,7 @@
          if (op_unop == Iop_Left64 && !mode64)
             goto irreducible;
          r_dst = newVRegI(env);
-         r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
          return r_dst;
@@ -2029,7 +2076,7 @@
 
       case Iop_CmpwNEZ32: {
          HReg r_dst = newVRegI(env);
-         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/, 
@@ -2039,7 +2086,7 @@
 
       case Iop_CmpwNEZ64: {
          HReg r_dst = newVRegI(env);
-         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          if (!mode64) goto irreducible;
          addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
          addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
@@ -2051,20 +2098,31 @@
       case Iop_V128to32: {
          HReg        r_aligned16;
          HReg        dst  = newVRegI(env);
-         HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg);
-         PPCAMode *am_off0, *am_off12;
+         HReg        vec  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+         PPCAMode *am_off0, *am_off_word0;
          sub_from_sp( env, 32 );     // Move SP down 32 bytes
 
          // get a quadword aligned address within our stack space
          r_aligned16 = get_sp_aligned16( env );
          am_off0  = PPCAMode_IR( 0, r_aligned16 );
-         am_off12 = PPCAMode_IR( 12,r_aligned16 );
+
+         /* Note that the store below (done via PPCInstr_AvLdSt) uses
+          * stvx, which stores the vector in proper LE format,
+          * with byte zero (far right byte of the register in LE format)
+          * stored at the lowest memory address.  Therefore, to obtain
+          * integer word zero, we need to use that lowest memory address
+          * as the base for the load.
+          */
+         if (IEndianess == Iend_LE)
+            am_off_word0 = am_off0;
+         else
+            am_off_word0 = PPCAMode_IR( 12,r_aligned16 );
 
          // store vec, load low word to dst
          addInstr(env,
                   PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
          addInstr(env,
-                  PPCInstr_Load( 4, dst, am_off12, mode64 ));
+                  PPCInstr_Load( 4, dst, am_off_word0, mode64 ));
 
          add_to_sp( env, 32 );       // Reset SP
          return dst;
@@ -2075,8 +2133,8 @@
          if (mode64) {
             HReg     r_aligned16;
             HReg     dst = newVRegI(env);
-            HReg     vec = iselVecExpr(env, e->Iex.Unop.arg);
-            PPCAMode *am_off0, *am_off8;
+            HReg     vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
+            PPCAMode *am_off0, *am_off8, *am_off_arg;
             sub_from_sp( env, 32 );     // Move SP down 32 bytes
 
             // get a quadword aligned address within our stack space
@@ -2084,13 +2142,24 @@
             am_off0 = PPCAMode_IR( 0, r_aligned16 );
             am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
 
-            // store vec, load low word (+8) or high (+0) to dst
+            // store vec, load low word or high to dst
             addInstr(env,
                      PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
+            if (IEndianess == Iend_LE) {
+               if (op_unop == Iop_V128HIto64)
+                  am_off_arg = am_off8;
+               else
+                  am_off_arg = am_off0;
+            } else {
+               if (op_unop == Iop_V128HIto64)
+                  am_off_arg = am_off0;
+               else
+                  am_off_arg = am_off8;
+            }
             addInstr(env,
                      PPCInstr_Load( 
                         8, dst, 
-                        op_unop == Iop_V128HIto64 ? am_off0 : am_off8, 
+                        am_off_arg,
                         mode64 ));
 
             add_to_sp( env, 32 );       // Reset SP
@@ -2102,7 +2171,7 @@
       case Iop_32to16:
       case Iop_64to8:
          /* These are no-ops. */
-         return iselWordExpr_R(env, e->Iex.Unop.arg);
+         return iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          
       /* ReinterpF64asI64(e) */
       /* Given an IEEE754 double, produce an I64 with the same bit
@@ -2110,7 +2179,7 @@
       case Iop_ReinterpF64asI64: 
          if (mode64) {
             PPCAMode *am_addr;
-            HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
+            HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
             HReg r_dst  = newVRegI(env);
 
             sub_from_sp( env, 16 );     // Move SP down 16 bytes
@@ -2134,7 +2203,7 @@
          /* I believe this generates correct code for both 32- and
             64-bit hosts. */
          PPCAMode *am_addr;
-         HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg);
+         HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg r_dst  = newVRegI(env);
 
          sub_from_sp( env, 16 );     // Move SP down 16 bytes
@@ -2154,7 +2223,7 @@
       case Iop_ReinterpD64asI64:
          if (mode64) {
             PPCAMode *am_addr;
-            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg);
+            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
             HReg r_dst  = newVRegI(env);
 
             sub_from_sp( env, 16 );     // Move SP down 16 bytes
@@ -2179,7 +2248,6 @@
          HReg        argregs[1];
          HReg        r_dst  = newVRegI(env);
          Int         argreg;
-         HWord*      fdescr;
 
          argiregs = 0;
          argreg = 0;
@@ -2187,13 +2255,21 @@
 
          argiregs |= (1 << (argreg+3));
          addInstr(env, mk_iMOVds_RR( argregs[argreg++],
-                                     iselWordExpr_R(env, e->Iex.Unop.arg) ) );
+                                     iselWordExpr_R(env, e->Iex.Unop.arg,
+                                                    IEndianess) ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
-
-         fdescr = (HWord*)h_calc_BCDtoDPB;
-         addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
-                                      argiregs, mk_RetLoc_simple(RLPri_Int)) );
+         if (IEndianess == Iend_LE) {
+             addInstr(env, PPCInstr_Call( cc, Ptr_to_ULong(h_calc_BCDtoDPB),
+                                          argiregs,
+                                          mk_RetLoc_simple(RLPri_Int)) );
+         } else {
+             HWord*      fdescr;
+             fdescr = (HWord*)h_calc_BCDtoDPB;
+             addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
+                                          argiregs,
+                                          mk_RetLoc_simple(RLPri_Int)) );
+         }
 
          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
          return r_dst;
@@ -2208,7 +2284,6 @@
          HReg        argregs[1];
          HReg        r_dst  = newVRegI(env);
          Int         argreg;
-         HWord*      fdescr;
 
          argiregs = 0;
          argreg = 0;
@@ -2216,13 +2291,22 @@
 
          argiregs |= (1 << (argreg+3));
          addInstr(env, mk_iMOVds_RR( argregs[argreg++],
-                                     iselWordExpr_R(env, e->Iex.Unop.arg) ) );
+                                     iselWordExpr_R(env, e->Iex.Unop.arg,
+                                                    IEndianess) ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
-         fdescr = (HWord*)h_calc_DPBtoBCD;
-         addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
-                                      argiregs, mk_RetLoc_simple(RLPri_Int) ) );
+        if (IEndianess == Iend_LE) {
+            addInstr(env, PPCInstr_Call( cc, Ptr_to_ULong(h_calc_DPBtoBCD),
+                                         argiregs, 
+                                         mk_RetLoc_simple(RLPri_Int) ) );
+	} else {
+            HWord*      fdescr;
+            fdescr = (HWord*)h_calc_DPBtoBCD;
+            addInstr(env, PPCInstr_Call( cc, (Addr64)(fdescr[0]),
+                                         argiregs,
+                                         mk_RetLoc_simple(RLPri_Int) ) );
+         }
 
          addInstr(env, mk_iMOVds_RR(r_dst, argregs[0]));
          return r_dst;
@@ -2236,7 +2320,7 @@
         case Iop_ExtractExpD64: {
 
             HReg fr_dst = newVRegI(env);
-            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg);
+            HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
             HReg tmp    = newVRegF(env);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
             addInstr(env, PPCInstr_Dfp64Unary(Pfp_DXEX, tmp, fr_src));
@@ -2255,7 +2339,8 @@
             HReg tmp    = newVRegF(env);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 
-            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
+            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
+                           IEndianess);
             addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
                                                   r_srcHi, r_srcLo));
 
@@ -2289,7 +2374,8 @@
    case Iex_GetI: {
       PPCAMode* src_am
          = genGuestArrayOffset( env, e->Iex.GetI.descr,
-                                     e->Iex.GetI.ix, e->Iex.GetI.bias );
+                                e->Iex.GetI.ix, e->Iex.GetI.bias,
+                                IEndianess );
       HReg r_dst = newVRegI(env);
       if (mode64 && ty == Ity_I64) {
          addInstr(env, PPCInstr_Load( toUChar(8),
@@ -2317,7 +2403,8 @@
       UInt   addToSp = 0;
       RetLoc rloc    = mk_RetLoc_INVALID();
       doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
-                    e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
+                    e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args,
+                    IEndianess );
       vassert(is_sane_RetLoc(rloc));
       vassert(rloc.pri == RLPri_Int);
       vassert(addToSp == 0);
@@ -2351,11 +2438,11 @@
       if ((ty == Ity_I8  || ty == Ity_I16 ||
            ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
           typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
-         PPCRI* r1    = iselWordExpr_RI(env, e->Iex.ITE.iftrue);
-         HReg   r0    = iselWordExpr_R(env, e->Iex.ITE.iffalse);
+         PPCRI* r1    = iselWordExpr_RI(env, e->Iex.ITE.iftrue, IEndianess);
+         HReg   r0    = iselWordExpr_R(env, e->Iex.ITE.iffalse, IEndianess);
          HReg   r_dst = newVRegI(env);
          addInstr(env, mk_iMOVds_RR(r_dst,r0));
-         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
+         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
          addInstr(env, PPCInstr_CMov(cc, r_dst, r1));
          return r_dst;
       }
@@ -2429,15 +2516,17 @@
 }
 
 static 
-PPCAMode* iselWordExpr_AMode ( ISelEnv* env, IRExpr* e, IRType xferTy )
+PPCAMode* iselWordExpr_AMode ( ISelEnv* env, IRExpr* e, IRType xferTy,
+                               IREndness IEndianess )
 {
-   PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy);
+   PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy, IEndianess);
    vassert(sane_AMode(env, am));
    return am;
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType xferTy )
+static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e,
+                                          IRType xferTy, IREndness IEndianess )
 {
    IRType ty = typeOfIRExpr(env->type_env,e);
 
@@ -2465,14 +2554,15 @@
           && uLong_fits_in_16_bits(e->Iex.Binop.arg2
                                     ->Iex.Const.con->Ico.U64)) {
          return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
-                             iselWordExpr_R(env, e->Iex.Binop.arg1) );
+                             iselWordExpr_R(env, e->Iex.Binop.arg1,
+                                            IEndianess) );
       }
       
       /* Add64(expr,expr) */
       if (e->tag == Iex_Binop 
           && e->Iex.Binop.op == Iop_Add64) {
-         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          return PPCAMode_RR( r_idx, r_base );
       }
 
@@ -2488,14 +2578,15 @@
           && uInt_fits_in_16_bits(e->Iex.Binop.arg2
                                    ->Iex.Const.con->Ico.U32)) {
          return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
-                             iselWordExpr_R(env, e->Iex.Binop.arg1) );
+                             iselWordExpr_R(env, e->Iex.Binop.arg1,
+                                            IEndianess) );
       }
       
       /* Add32(expr,expr) */
       if (e->tag == Iex_Binop 
           && e->Iex.Binop.op == Iop_Add32) {
-         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg r_idx  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          return PPCAMode_RR( r_idx, r_base );
       }
 
@@ -2503,7 +2594,7 @@
 
    /* Doesn't match anything in particular.  Generate it into
       a register and use that. */
-   return PPCAMode_IR( 0, iselWordExpr_R(env,e) );
+   return PPCAMode_IR( 0, iselWordExpr_R(env,e,IEndianess) );
 }
 
 
@@ -2516,9 +2607,10 @@
    signed immediates that are return can have their sign inverted if
    need be. */
 
-static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, IRExpr* e )
+static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, IRExpr* e,
+                                IREndness IEndianess )
 {
-   PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e);
+  PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e, IEndianess);
    /* sanity checks ... */
    switch (ri->tag) {
    case Prh_Imm:
@@ -2536,7 +2628,8 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, IRExpr* e )
+static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, IRExpr* e,
+                                    IREndness IEndianess )
 {
    ULong u;
    Long  l;
@@ -2569,7 +2662,7 @@
    }
 
    /* default case: calculate into a register and return that */
-   return PPCRH_Reg( iselWordExpr_R ( env, e ) );
+   return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
 }
 
 
@@ -2579,9 +2672,9 @@
    iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
    in 64-bit mode, 64 bits. */
 
-static PPCRI* iselWordExpr_RI ( ISelEnv* env, IRExpr* e )
+static PPCRI* iselWordExpr_RI ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
-   PPCRI* ri = iselWordExpr_RI_wrk(env, e);
+   PPCRI* ri = iselWordExpr_RI_wrk(env, e, IEndianess);
    /* sanity checks ... */
    switch (ri->tag) {
    case Pri_Imm:
@@ -2596,7 +2689,8 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
+static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e,
+                                    IREndness IEndianess )
 {
    Long  l;
    IRType ty = typeOfIRExpr(env->type_env,e);
@@ -2618,7 +2712,7 @@
    }
 
    /* default case: calculate into a register and return that */
-   return PPCRI_Reg( iselWordExpr_R ( env, e ) );
+   return PPCRI_Reg( iselWordExpr_R ( env, e, IEndianess ) );
 }
 
 
@@ -2628,11 +2722,12 @@
    being an immediate in the range 1 .. 31 inclusive.  Used for doing
    shift amounts.  Only used in 32-bit mode. */
 
-static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, IRExpr* e )
+static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, IRExpr* e,
+                                  IREndness IEndianess )
 {
    PPCRH* ri;
    vassert(!env->mode64);
-   ri = iselWordExpr_RH5u_wrk(env, e);
+   ri = iselWordExpr_RH5u_wrk(env, e, IEndianess);
    /* sanity checks ... */
    switch (ri->tag) {
    case Prh_Imm:
@@ -2649,7 +2744,8 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e )
+static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e,
+                                      IREndness IEndianess )
 {
    IRType ty = typeOfIRExpr(env->type_env,e);
    vassert(ty == Ity_I8);
@@ -2663,7 +2759,7 @@
    }
 
    /* default case: calculate into a register and return that */
-   return PPCRH_Reg( iselWordExpr_R ( env, e ) );
+   return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
 }
 
 
@@ -2673,11 +2769,12 @@
    being an immediate in the range 1 .. 63 inclusive.  Used for doing
    shift amounts.  Only used in 64-bit mode. */
 
-static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, IRExpr* e )
+static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, IRExpr* e,
+                                  IREndness IEndianess )
 {
    PPCRH* ri; 
    vassert(env->mode64);
-   ri = iselWordExpr_RH6u_wrk(env, e);
+   ri = iselWordExpr_RH6u_wrk(env, e, IEndianess);
    /* sanity checks ... */
    switch (ri->tag) {
    case Prh_Imm:
@@ -2694,7 +2791,8 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e )
+static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e,
+                                      IREndness IEndianess )
 {
    IRType ty = typeOfIRExpr(env->type_env,e);
    vassert(ty == Ity_I8);
@@ -2708,7 +2806,7 @@
    }
 
    /* default case: calculate into a register and return that */
-   return PPCRH_Reg( iselWordExpr_R ( env, e ) );
+   return PPCRH_Reg( iselWordExpr_R ( env, e, IEndianess ) );
 }
 
 
@@ -2718,14 +2816,16 @@
    condition code which would correspond when the expression would
    notionally have returned 1. */
 
-static PPCCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
+static PPCCondCode iselCondCode ( ISelEnv* env, IRExpr* e,
+                                  IREndness IEndianess )
 {
    /* Uh, there's nothing we can sanity check here, unfortunately. */
-   return iselCondCode_wrk(env,e);
+   return iselCondCode_wrk(env,e, IEndianess);
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static PPCCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
+static PPCCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e,
+                                      IREndness IEndianess )
 {
    vassert(e);
    vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
@@ -2743,7 +2843,7 @@
    /* Not1(...) */
    if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
       /* Generate code for the arg, and negate the test condition */
-      PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+      PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
       cond.test = invertCondTest(cond.test);
       return cond;
    }
@@ -2753,7 +2853,7 @@
    /* 32to1, 64to1 */
    if (e->tag == Iex_Unop &&
        (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
-      HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+      HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
       HReg tmp = newVRegI(env);
       /* could do better, probably -- andi. */
       addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
@@ -2770,7 +2870,7 @@
    /* could do better -- andi. */
    if (e->tag == Iex_Unop
        && e->Iex.Unop.op == Iop_CmpNEZ8) {
-      HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg);
+      HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
       HReg tmp = newVRegI(env);
       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
                                  PPCRH_Imm(False,0xFF)));
@@ -2784,7 +2884,7 @@
    /* CmpNEZ32(x) */
    if (e->tag == Iex_Unop
        && e->Iex.Unop.op == Iop_CmpNEZ32) {
-      HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg);
+      HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
       addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
                                  7/*cr*/, r1, PPCRH_Imm(False,0)));
       return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
@@ -2802,8 +2902,8 @@
            || e->Iex.Binop.op == Iop_CmpLE32U)) {
       Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
                     e->Iex.Binop.op == Iop_CmpLE32S);
-      HReg   r1  = iselWordExpr_R(env, e->Iex.Binop.arg1);
-      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+      HReg   r1  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
       addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
                                  7/*cr*/, r1, ri2));
 
@@ -2826,13 +2926,13 @@
       if (!env->mode64) {
          HReg hi, lo;
          HReg tmp = newVRegI(env);
-         iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
+         iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg, IEndianess );
          addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
          addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
                                     7/*cr*/, tmp,PPCRH_Imm(False,0)));
          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
       } else {  // mode64
-         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
                                     7/*cr*/, r_src,PPCRH_Imm(False,0)));
          return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
@@ -2851,8 +2951,8 @@
            || e->Iex.Binop.op == Iop_CmpLE64U)) {
       Bool   syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
                       e->Iex.Binop.op == Iop_CmpLE64S);
-      HReg    r1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
-      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+      HReg    r1 = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+      PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2, IEndianess);
       vassert(env->mode64);
       addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
                                  7/*cr*/, r1, ri2));
@@ -2874,7 +2974,7 @@
    if (e->tag == Iex_Binop
        && e->Iex.Binop.op == Iop_CmpNE8
        && isZeroU8(e->Iex.Binop.arg2)) {
-      HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1);
+      HReg arg = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
       HReg tmp = newVRegI(env);
       addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
                                  PPCRH_Imm(False,0xFF)));
@@ -2913,10 +3013,10 @@
    caller.  */
 
 static void iselInt128Expr ( HReg* rHi, HReg* rLo,
-                             ISelEnv* env, IRExpr* e )
+                             ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
    vassert(env->mode64);
-   iselInt128Expr_wrk(rHi, rLo, env, e);
+   iselInt128Expr_wrk(rHi, rLo, env, e, IEndianess);
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 #  endif
@@ -2928,7 +3028,7 @@
 
 /* DO NOT CALL THIS DIRECTLY ! */
 static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
-                                 ISelEnv* env, IRExpr* e )
+                                 ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
    vassert(e);
    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
@@ -2948,8 +3048,8 @@
          HReg     tLo     = newVRegI(env);
          HReg     tHi     = newVRegI(env);
          Bool     syned   = toBool(e->Iex.Binop.op == Iop_MullS64);
-         HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/, 
                                      False/*lo64*/, False/*64bit mul*/,
                                      tLo, r_srcL, r_srcR));
@@ -2963,8 +3063,8 @@
 
       /* 64HLto128(e1,e2) */
       case Iop_64HLto128:
-         *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
-         *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+         *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          return;
       default: 
          break;
@@ -2992,10 +3092,11 @@
 
 /* 32-bit mode ONLY: compute a 128-bit value into a register quad */
 static void iselInt128Expr_to_32x4 ( HReg* rHi, HReg* rMedHi, HReg* rMedLo,
-                                     HReg* rLo, ISelEnv* env, IRExpr* e )
+                                     HReg* rLo, ISelEnv* env, IRExpr* e,
+                                     IREndness IEndianess )
 {
    vassert(!env->mode64);
-   iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e);
+   iselInt128Expr_to_32x4_wrk(rHi, rMedHi, rMedLo, rLo, env, e, IEndianess);
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 #  endif
@@ -3011,7 +3112,8 @@
 
 static void iselInt128Expr_to_32x4_wrk ( HReg* rHi, HReg* rMedHi,
                                          HReg* rMedLo, HReg* rLo,
-                                         ISelEnv* env, IRExpr* e )
+                                         ISelEnv* env, IRExpr* e,
+                                         IREndness IEndianess )
 {
    vassert(e);
    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
@@ -3027,8 +3129,8 @@
       IROp op_binop = e->Iex.Binop.op;
       switch (op_binop) {
       case Iop_64HLto128:
-         iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1);
-         iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2);
+         iselInt64Expr(rHi, rMedHi, env, e->Iex.Binop.arg1, IEndianess);
+         iselInt64Expr(rMedLo, rLo, env, e->Iex.Binop.arg2, IEndianess);
          return;
       default:
          vex_printf("iselInt128Expr_to_32x4_wrk: Binop case 0x%x not found\n",
@@ -3048,10 +3150,11 @@
    caller.  */
 
 static void iselInt64Expr ( HReg* rHi, HReg* rLo,
-                            ISelEnv* env, IRExpr* e )
+                            ISelEnv* env, IRExpr* e,
+                            IREndness IEndianess )
 {
    vassert(!env->mode64);
-   iselInt64Expr_wrk(rHi, rLo, env, e);
+   iselInt64Expr_wrk(rHi, rLo, env, e, IEndianess);
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 #  endif
@@ -3063,16 +3166,17 @@
 
 /* DO NOT CALL THIS DIRECTLY ! */
 static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
-                                ISelEnv* env, IRExpr* e )
+                                ISelEnv* env, IRExpr* e,
+                                IREndness IEndianess )
 {
    vassert(e);
    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
 
    /* 64-bit load */
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
       HReg tLo    = newVRegI(env);
       HReg tHi    = newVRegI(env);
-      HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
+      HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr, IEndianess);
       vassert(!env->mode64);
       addInstr(env, PPCInstr_Load( 4/*byte-load*/,
                                    tHi, PPCAMode_IR( 0, r_addr ), 
@@ -3123,13 +3227,13 @@
    /* 64-bit ITE */
    if (e->tag == Iex_ITE) { // VFD
       HReg e0Lo, e0Hi, eXLo, eXHi;
-      iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue);
-      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse);
+      iselInt64Expr(&eXHi, &eXLo, env, e->Iex.ITE.iftrue, IEndianess);
+      iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.ITE.iffalse, IEndianess);
       HReg tLo = newVRegI(env);
       HReg tHi = newVRegI(env);
       addInstr(env, mk_iMOVds_RR(tHi,e0Hi));
       addInstr(env, mk_iMOVds_RR(tLo,e0Lo));
-      PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
+      PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
       addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(eXHi)));
       addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(eXLo)));
       *rHi = tHi;
@@ -3147,8 +3251,10 @@
             HReg     tLo     = newVRegI(env);
             HReg     tHi     = newVRegI(env);
             Bool     syned   = toBool(op_binop == Iop_MullS32);
-            HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1);
-            HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            HReg     r_srcL  = iselWordExpr_R(env, e->Iex.Binop.arg1,
+                                              IEndianess);
+            HReg     r_srcR  = iselWordExpr_R(env, e->Iex.Binop.arg2,
+                                              IEndianess);
             addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/, 
                                         False/*lo32*/, True/*32bit mul*/,
                                         tLo, r_srcL, r_srcR));
@@ -3169,8 +3275,8 @@
             HReg tHi = newVRegI(env);
             PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
                           (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
-            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
-            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
+            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
             addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
             addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
             *rHi = tHi;
@@ -3183,8 +3289,8 @@
             HReg xLo, xHi, yLo, yHi;
             HReg tLo = newVRegI(env);
             HReg tHi = newVRegI(env);
-            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
-            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1, IEndianess);
+            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2, IEndianess);
             addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
                                             tLo, xLo, yLo));
             addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
@@ -3196,8 +3302,8 @@
 
          /* 32HLto64(e1,e2) */
          case Iop_32HLto64:
-            *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
-            *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+            *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
             return;
 
          /* F64toI64[S|U] */
@@ -3207,12 +3313,13 @@
             HReg      r1      = StackFramePtr(env->mode64);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
-            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2);
+            HReg      fsrc    = iselDblExpr(env, e->Iex.Binop.arg2,
+                                            IEndianess);
             HReg      ftmp    = newVRegF(env);
 
             vassert(!env->mode64);
             /* Set host rounding mode */
-            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
             sub_from_sp( env, 16 );
             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
@@ -3235,11 +3342,11 @@
             HReg      r1      = StackFramePtr(env->mode64);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
-            HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
+            HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
             HReg tmp    = newVRegF(env);
 
             vassert(!env->mode64);
-            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
+            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
             addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTFIX, tmp, fr_src));
 
             sub_from_sp( env, 16 );
@@ -3261,8 +3368,9 @@
             PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 
-            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
-            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
+            set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+            iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
+                           IEndianess);
             addInstr(env, PPCInstr_DfpD128toD64(fpop, ftmp, r_srcHi, r_srcLo));
 
             // put the D64 result into an integer register pair
@@ -3290,7 +3398,7 @@
          HReg argHi, argLo;
          HReg tmp1  = newVRegI(env);
          HReg tmp2  = newVRegI(env);
-         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
+         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
          /* tmp1 = argHi | argLo */
          addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
          /* tmp2 = (tmp1 | -tmp1) >>s 31 */
@@ -3309,7 +3417,7 @@
          HReg zero32 = newVRegI(env);
          HReg resHi  = newVRegI(env);
          HReg resLo  = newVRegI(env);
-         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
+         iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg, IEndianess);
          vassert(env->mode64 == False);
          addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
          /* resHi:resLo = - argHi:argLo */
@@ -3328,7 +3436,7 @@
       /* 32Sto64(e) */
       case Iop_32Sto64: {
          HReg tHi = newVRegI(env);
-         HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
                                      tHi, src, PPCRH_Imm(False,31)));
          *rHi = tHi;
@@ -3337,7 +3445,7 @@
       }
       case Iop_ExtractExpD64: {
          HReg tmp    = newVRegF(env);
-         HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg);
+         HReg fr_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
          HReg      tLo     = newVRegI(env);
          HReg      tHi     = newVRegI(env);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
@@ -3364,7 +3472,7 @@
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_ExtractExpD128(Pfp_DXEXQ, tmp,
                                                   r_srcHi, r_srcLo));
 
@@ -3382,7 +3490,7 @@
       /* 32Uto64(e) */
       case Iop_32Uto64: {
          HReg tHi = newVRegI(env);
-         HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
          *rHi = tHi;
          *rLo = tLo;
@@ -3398,7 +3506,7 @@
          HReg r_Lo    = INVALID_HREG;
 
          iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
-                                env, e->Iex.Unop.arg);
+                                env, e->Iex.Unop.arg, IEndianess);
          *rHi = r_MedLo;
          *rLo = r_Lo;
          return;
@@ -3413,7 +3521,7 @@
          HReg r_Lo    = INVALID_HREG;
 
          iselInt128Expr_to_32x4(&r_Hi, &r_MedHi, &r_MedLo, &r_Lo,
-                                env, e->Iex.Unop.arg);
+                                env, e->Iex.Unop.arg, IEndianess);
          *rHi = r_Hi;
          *rLo = r_MedHi;
          return;
@@ -3426,7 +3534,7 @@
          Int  off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
          HReg tLo = newVRegI(env);
          HReg tHi = newVRegI(env);
-         HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg vec = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          PPCAMode *am_off0, *am_offLO, *am_offHI;
          sub_from_sp( env, 32 );     // Move SP down 32 bytes
          
@@ -3456,7 +3564,7 @@
       case Iop_1Sto64: {
          HReg tLo = newVRegI(env);
          HReg tHi = newVRegI(env);
-         PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+         PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Set(cond,tLo));
          addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
                                      tLo, tLo, PPCRH_Imm(False,31)));
@@ -3472,7 +3580,7 @@
          HReg xLo, xHi;
          HReg tmpLo = newVRegI(env);
          HReg tmpHi = newVRegI(env);
-         iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
+         iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
          addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
          *rHi = tmpHi;
@@ -3485,7 +3593,7 @@
          pattern. */
       case Iop_ReinterpF64asI64: {
          PPCAMode *am_addr0, *am_addr1;
-         HReg fr_src  = iselDblExpr(env, e->Iex.Unop.arg);
+         HReg fr_src  = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg r_dstLo = newVRegI(env);
          HReg r_dstHi = newVRegI(env);
          
@@ -3510,7 +3618,7 @@
       }
 
       case Iop_ReinterpD64asI64: {
-         HReg fr_src  = iselDfp64Expr(env, e->Iex.Unop.arg);
+         HReg fr_src  = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
          PPCAMode *am_addr0, *am_addr1;
          HReg r_dstLo = newVRegI(env);
          HReg r_dstHi = newVRegI(env);
@@ -3546,7 +3654,6 @@
          HReg        tHi = newVRegI(env);
          HReg        tmpHi;
          HReg        tmpLo;
-         ULong       target;
          Bool        mode64 = env->mode64;
 
          argregs[0] = hregPPC_GPR3(mode64);
@@ -3555,7 +3662,7 @@
          argiregs = 0;
          argreg = 0;
 
-         iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg );
+         iselInt64Expr( &tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess );
 
          argiregs |= ( 1 << (argreg+3 ) );
          addInstr( env, mk_iMOVds_RR( argregs[argreg++], tmpHi ) );
@@ -3564,11 +3671,19 @@
          addInstr( env, mk_iMOVds_RR( argregs[argreg], tmpLo ) );
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
-         target = toUInt( Ptr_to_ULong(h_calc_BCDtoDPB ) );
 
-         addInstr( env, PPCInstr_Call( cc, (Addr64)target,
-                                       argiregs,
-                                       mk_RetLoc_simple(RLPri_2Int) ) );
+         if (IEndianess == Iend_LE) {
+             addInstr( env, PPCInstr_Call( cc, Ptr_to_ULong(h_calc_BCDtoDPB),
+                                           argiregs,
+                                           mk_RetLoc_simple(RLPri_2Int) ) );
+         } else {
+             ULong       target;
+             target = toUInt( Ptr_to_ULong(h_calc_BCDtoDPB ) );
+             addInstr( env, PPCInstr_Call( cc, (Addr64)target,
+                                           argiregs,
+                                           mk_RetLoc_simple(RLPri_2Int) ) );
+         }
+
          addInstr( env, mk_iMOVds_RR( tHi, argregs[argreg-1] ) );
          addInstr( env, mk_iMOVds_RR( tLo, argregs[argreg] ) );
 
@@ -3586,7 +3701,6 @@
          HReg        tHi = newVRegI(env);
          HReg        tmpHi;
          HReg        tmpLo;
-         ULong       target;
          Bool        mode64 = env->mode64;
 
          argregs[0] = hregPPC_GPR3(mode64);
@@ -3595,7 +3709,7 @@
          argiregs = 0;
          argreg = 0;
 
-         iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg);
+         iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg, IEndianess);
 
          argiregs |= (1 << (argreg+3));
          addInstr(env, mk_iMOVds_RR( argregs[argreg++], tmpHi ));
@@ -3605,10 +3719,17 @@
 
          cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
-         target = toUInt( Ptr_to_ULong( h_calc_DPBtoBCD ) );
+         if (IEndianess == Iend_LE) {
+             addInstr(env, PPCInstr_Call( cc, Ptr_to_ULong(h_calc_DPBtoBCD),
+                                          argiregs,
+                                          mk_RetLoc_simple(RLPri_2Int) ) );
+         } else {
+             ULong       target;
+             target = toUInt( Ptr_to_ULong( h_calc_DPBtoBCD ) );
+             addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs,
+                                          mk_RetLoc_simple(RLPri_2Int) ) );
+         }
 
-         addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs,
-                                      mk_RetLoc_simple(RLPri_2Int) ) );
          addInstr(env, mk_iMOVds_RR(tHi, argregs[argreg-1]));
          addInstr(env, mk_iMOVds_RR(tLo, argregs[argreg]));
 
@@ -3635,9 +3756,9 @@
 /* Nothing interesting here; really just wrappers for
    64-bit stuff. */
 
-static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
-   HReg r = iselFltExpr_wrk( env, e );
+  HReg r = iselFltExpr_wrk( env, e, IEndianess );
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 #  endif
@@ -3647,7 +3768,7 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY */
-static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
    Bool        mode64 = env->mode64;
 
@@ -3658,11 +3779,12 @@
       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
    }
 
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
       PPCAMode* am_addr;
       HReg r_dst = newVRegF(env);
       vassert(e->Iex.Load.ty == Ity_F32);
-      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/);
+      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/,
+                                   IEndianess);
       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
       return r_dst;
    }
@@ -3704,7 +3826,7 @@
          part the latter optimisation will apply and hence this code
          will not often be used.
       */
-      HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg);
+      HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
       HReg      fdst    = newVRegF(env);
       PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 
@@ -3722,12 +3844,12 @@
    if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
       if (mode64) {
          HReg fdst = newVRegF(env);
-         HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
          HReg r1   = StackFramePtr(env->mode64);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
 
          /* Set host rounding mode */
-         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
          sub_from_sp( env, 16 );
 
@@ -3750,10 +3872,10 @@
          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
          PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
 
-         iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2);
+         iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2, IEndianess);
 
          /* Set host rounding mode */
-         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
          sub_from_sp( env, 16 );
 
@@ -3806,9 +3928,9 @@
     positive zero         0           0             .000000---0
 */
 
-static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
-   HReg r = iselDblExpr_wrk( env, e );
+   HReg r = iselDblExpr_wrk( env, e, IEndianess );
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 #  endif
@@ -3818,7 +3940,7 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY */
-static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
    Bool mode64 = env->mode64;
    IRType ty = typeOfIRExpr(env->type_env,e);
@@ -3860,11 +3982,12 @@
    }
 
    /* --------- LOAD --------- */
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
       HReg r_dst = newVRegF(env);
       PPCAMode* am_addr;
       vassert(e->Iex.Load.ty == Ity_F64);
-      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/);
+      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/,
+                                   IEndianess);
       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
       return r_dst;
    }
@@ -3890,10 +4013,13 @@
       }
       if (fpop != Pfp_INVALID) {
          HReg r_dst  = newVRegF(env);
-         HReg r_srcML  = iselDblExpr(env, e->Iex.Qop.details->arg2);
-         HReg r_srcMR  = iselDblExpr(env, e->Iex.Qop.details->arg3);
-         HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4);
-         set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1 );
+         HReg r_srcML  = iselDblExpr(env, e->Iex.Qop.details->arg2,
+                                     IEndianess);
+         HReg r_srcMR  = iselDblExpr(env, e->Iex.Qop.details->arg3,
+                                     IEndianess);
+         HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.details->arg4,
+                                     IEndianess);
+         set_FPU_rounding_mode( env, e->Iex.Qop.details->arg1, IEndianess );
          addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst, 
                                                r_srcML, r_srcMR, r_srcAcc));
          return r_dst;
@@ -3916,9 +4042,9 @@
       }
       if (fpop != Pfp_INVALID) {
          HReg r_dst  = newVRegF(env);
-         HReg r_srcL = iselDblExpr(env, triop->arg2);
-         HReg r_srcR = iselDblExpr(env, triop->arg3);
-         set_FPU_rounding_mode( env, triop->arg1 );
+         HReg r_srcL = iselDblExpr(env, triop->arg2, IEndianess);
+         HReg r_srcR = iselDblExpr(env, triop->arg3, IEndianess);
+         set_FPU_rounding_mode( env, triop->arg1, IEndianess );
          addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
          return r_dst;
       }
@@ -3932,8 +4058,8 @@
       }
       if (fpop == Pfp_SQRT) {
          HReg fr_dst = newVRegF(env);
-         HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2);
-         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+         HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
          addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
          return fr_dst;
       }
@@ -3943,8 +4069,8 @@
 
       if (e->Iex.Binop.op == Iop_RoundF64toF32) {
          HReg r_dst = newVRegF(env);
-         HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2);
-         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+         HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
          addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
          //set_FPU_rounding_default( env );
          return r_dst;
@@ -3953,12 +4079,12 @@
       if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
          if (mode64) {
             HReg fdst = newVRegF(env);
-            HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
             HReg r1   = StackFramePtr(env->mode64);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
 
             /* Set host rounding mode */
-            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
             sub_from_sp( env, 16 );
 
@@ -3982,10 +4108,11 @@
             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
             PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
 
-            iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2);
+            iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2,
+                          IEndianess);
 
             /* Set host rounding mode */
-            set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+            set_FPU_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
 
             sub_from_sp( env, 16 );
 
@@ -4021,7 +4148,7 @@
       }
       if (fpop != Pfp_INVALID) {
          HReg fr_dst = newVRegF(env);
-         HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
+         HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
          return fr_dst;
       }
@@ -4034,10 +4161,11 @@
                bit pattern. */
             if (!mode64) {
                HReg r_srcHi, r_srcLo;
-               iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
+               iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
+                               IEndianess);
                return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
             } else {
-               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
                return mk_LoadR64toFPR( env, r_src );
             }
          }
@@ -4047,7 +4175,7 @@
                      e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
                e = e->Iex.Unop.arg;
 
-               HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               HReg src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
                HReg fr_dst = newVRegF(env);
                PPCAMode *am_addr;
 
@@ -4067,7 +4195,7 @@
 
 
             /* this is a no-op */
-            HReg res = iselFltExpr(env, e->Iex.Unop.arg);
+            HReg res = iselFltExpr(env, e->Iex.Unop.arg, IEndianess);
             return res;
          }
          default: 
@@ -4079,11 +4207,11 @@
    if (e->tag == Iex_ITE) { // VFD
       if (ty == Ity_F64
           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
-         HReg fr1    = iselDblExpr(env, e->Iex.ITE.iftrue);
-         HReg fr0    = iselDblExpr(env, e->Iex.ITE.iffalse);
+         HReg fr1    = iselDblExpr(env, e->Iex.ITE.iftrue, IEndianess);
+         HReg fr0    = iselDblExpr(env, e->Iex.ITE.iffalse, IEndianess);
          HReg fr_dst = newVRegF(env);
          addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, fr0 ));
-         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
+         PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
          addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr1 ));
          return fr_dst;
       }
@@ -4094,16 +4222,16 @@
    vpanic("iselDblExpr_wrk(ppc)");
 }
 
-static HReg iselDfp32Expr(ISelEnv* env, IRExpr* e)
+static HReg iselDfp32Expr(ISelEnv* env, IRExpr* e, IREndness IEndianess)
 {
-   HReg r = iselDfp32Expr_wrk( env, e );
+   HReg r = iselDfp32Expr_wrk( env, e, IEndianess );
    vassert(hregClass(r) == HRcFlt64);
    vassert( hregIsVirtual(r) );
    return r;
 }
 
 /* DO NOT CALL THIS DIRECTLY */
-static HReg iselDfp32Expr_wrk(ISelEnv* env, IRExpr* e)
+static HReg iselDfp32Expr_wrk(ISelEnv* env, IRExpr* e, IREndness IEndianess)
 {
    Bool mode64 = env->mode64;
    IRType ty = typeOfIRExpr( env->type_env, e );
@@ -4121,11 +4249,12 @@
    }
 
    /* --------- LOAD --------- */
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
       PPCAMode* am_addr;
       HReg r_dst = newVRegF(env);
       vassert(e->Iex.Load.ty == Ity_D32);
-      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/);
+      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D32/*xfer*/,
+                                   IEndianess);
       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
       return r_dst;
    }
@@ -4134,8 +4263,8 @@
    if (e->tag == Iex_Binop) {
       if (e->Iex.Binop.op == Iop_D64toD32) {
          HReg fr_dst = newVRegF(env);
-         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
+         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DRSP, fr_dst, fr_src));
          return fr_dst;
       }
@@ -4145,16 +4274,16 @@
    vpanic( "iselDfp32Expr_wrk(ppc)" );
 }
 
-static HReg iselDfp64Expr(ISelEnv* env, IRExpr* e)
+static HReg iselDfp64Expr(ISelEnv* env, IRExpr* e, IREndness IEndianess)
 {
-   HReg r = iselDfp64Expr_wrk( env, e );
+   HReg r = iselDfp64Expr_wrk( env, e, IEndianess );
    vassert(hregClass(r) == HRcFlt64);
    vassert( hregIsVirtual(r) );
    return r;
 }
 
 /* DO NOT CALL THIS DIRECTLY */
-static HReg iselDfp64Expr_wrk(ISelEnv* env, IRExpr* e)
+static HReg iselDfp64Expr_wrk(ISelEnv* env, IRExpr* e, IREndness IEndianess)
 {
    Bool mode64 = env->mode64;
    IRType ty = typeOfIRExpr( env->type_env, e );
@@ -4176,11 +4305,12 @@
       return r_dst;
    }
 
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
       PPCAMode* am_addr;
       HReg r_dst = newVRegF(env);
       vassert(e->Iex.Load.ty == Ity_D64);
-      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/);
+      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_D64/*xfer*/,
+                                   IEndianess);
       addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
       return r_dst;
    }
@@ -4199,27 +4329,30 @@
                bit pattern. */
          if (!mode64) {
             HReg r_srcHi, r_srcLo;
-            iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
+            iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg,
+                           IEndianess);
             return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
          } else {
-            HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+            HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
             return mk_LoadR64toFPR( env, r_src );
          }
       }
       case Iop_D32toD64: {
-         HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg);
+         HReg fr_src = iselDfp32Expr(env, e->Iex.Unop.arg, IEndianess);
          addInstr(env, PPCInstr_Dfp64Unary(Pfp_DCTDP, fr_dst, fr_src));
          return fr_dst;
       }
       case Iop_D128HItoD64:
-         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg );
+         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
+                         IEndianess );
          return r_dstHi;
       case Iop_D128LOtoD64:
-         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg );
+         iselDfp128Expr( &r_dstHi, &r_dstLo, env, e->Iex.Unop.arg,
+                         IEndianess );
          return r_dstLo;
       case Iop_InsertExpD64: {
-         HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1);
-         HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2);
+         HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2, IEndianess);
 
          addInstr(env, PPCInstr_Dfp64Binary(Pfp_DIEX, fr_dst, fr_srcL,
 					    fr_srcR));
@@ -4246,27 +4379,28 @@
          HReg r_srcHi = newVRegF(env);
          HReg r_srcLo = newVRegF(env);
 
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
+                        IEndianess);
          addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
          return fr_dst;
 
       } else if (fpop == Pfp_DRINTN) {
          HReg fr_src = newVRegF(env);
-         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1);
+         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
 
          /* NOTE, this IOP takes a DFP value and rounds to the
           * neares floating point integer value, i.e. fractional part
           * is zero.  The result is a decimal floating point number.
           * the INT in the name is a bit misleading.
           */
-         fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
+         fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_DfpRound(fr_dst, fr_src, r_rmc));
          return fr_dst;
 
       } else if (fpop == Pfp_DRSP) {
-         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2);
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
+         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
          addInstr(env, PPCInstr_Dfp64Unary(fpop, fr_dst, fr_src));
          return fr_dst;
 
@@ -4274,19 +4408,20 @@
          HReg fr_src = newVRegF(env);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
 
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
          sub_from_sp( env, 16 );
 
          // put the I64 value into a floating point register
          if (mode64) {
-            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2);
+           HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
 
            addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
          } else {
             HReg tmpHi, tmpLo;
             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 
-            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2);
+            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg2,
+                          IEndianess);
             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
          }
@@ -4304,8 +4439,8 @@
       default: break;
       }
       if (fpop != Pfp_INVALID) {
-         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1);
-         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2);
+         HReg fr_src = iselDfp64Expr(env, e->Iex.Binop.arg1, IEndianess);
+         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
 
          /* shift value must be an immediate value */
          vassert(shift->tag == Pri_Imm);
@@ -4322,13 +4457,13 @@
       }
       if (fpop != Pfp_INVALID) {
          HReg fr_srcL = newVRegF(env);
-         HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2);
+         HReg fr_srcR = iselDfp64Expr(env, e->Iex.Binop.arg2, IEndianess);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
          sub_from_sp( env, 16 );
 
          if (env->mode64) {
             // put the I64 value into a floating point reg
-            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
 
             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
          } else {
@@ -4337,7 +4472,8 @@
             HReg tmpLo;
             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 
-            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1);
+            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Binop.arg1,
+                          IEndianess);
             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*!mode64*/));
             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*!mode64*/));
          }
@@ -4371,10 +4507,10 @@
       }
       if (fpop != Pfp_INVALID) {
          HReg r_dst = newVRegF( env );
-         HReg r_srcL = iselDfp64Expr( env, triop->arg2 );
-         HReg r_srcR = iselDfp64Expr( env, triop->arg3 );
+         HReg r_srcL = iselDfp64Expr( env, triop->arg2, IEndianess );
+         HReg r_srcR = iselDfp64Expr( env, triop->arg3, IEndianess );
 
-         set_FPU_DFP_rounding_mode( env, triop->arg1 );
+         set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
          addInstr( env, PPCInstr_Dfp64Binary( fpop, r_dst, r_srcL, r_srcR ) );
          return r_dst;
       }
@@ -4386,9 +4522,9 @@
       }
       if (fpop == Pfp_DQUA) {
          HReg r_dst = newVRegF(env);
-         HReg r_srcL = iselDfp64Expr(env, triop->arg2);
-         HReg r_srcR = iselDfp64Expr(env, triop->arg3);
-         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1);
+         HReg r_srcL = iselDfp64Expr(env, triop->arg2, IEndianess);
+         HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
+         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
          addInstr(env, PPCInstr_DfpQuantize(fpop, r_dst, r_srcL, r_srcR,
                                             rmc));
          return r_dst;
@@ -4396,10 +4532,10 @@
       } else if (fpop == Pfp_RRDTR) {
          HReg r_dst = newVRegF(env);
          HReg r_srcL = newVRegF(env);
-         HReg r_srcR = iselDfp64Expr(env, triop->arg3);
-         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1);
+         HReg r_srcR = iselDfp64Expr(env, triop->arg3, IEndianess);
+         PPCRI* rmc  = iselWordExpr_RI(env, triop->arg1, IEndianess);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
-         HReg i8_val = iselWordExpr_R(env, triop->arg2);
+         HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
 
          /* Move I8 to float register to issue instruction */
          sub_from_sp( env, 16 );
@@ -4421,15 +4557,17 @@
    vpanic( "iselDfp64Expr_wrk(ppc)" );
 }
 
-static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e)
+static void iselDfp128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e,
+                           IREndness IEndianess)
 {
-   iselDfp128Expr_wrk( rHi, rLo, env, e );
+   iselDfp128Expr_wrk( rHi, rLo, env, e, IEndianess );
    vassert( hregIsVirtual(*rHi) );
    vassert( hregIsVirtual(*rLo) );
 }
 
 /* DO NOT CALL THIS DIRECTLY */
-static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env, IRExpr* e)
+static void iselDfp128Expr_wrk(HReg* rHi, HReg *rLo, ISelEnv* env, IRExpr* e,
+                               IREndness IEndianess)
 {
    vassert( e );
    vassert( typeOfIRExpr(env->type_env,e) == Ity_D128 );
@@ -4450,13 +4588,14 @@
 
          // put the I64 value into a floating point reg
          if (env->mode64) {
-            HReg tmp   = iselWordExpr_R(env, e->Iex.Unop.arg);
+            HReg tmp   = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
          } else {
             HReg tmpHi, tmpLo;
             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 
-            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg);
+            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
+                          IEndianess);
             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
          }
@@ -4467,7 +4606,7 @@
       }
 
       if (e->Iex.Unop.op == Iop_D64toD128) {
-         HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg);
+         HReg r_src = iselDfp64Expr(env, e->Iex.Unop.arg, IEndianess);
 
          /* Source is 64bit, result is 128 bit.  High 64bit source arg,
           * is ignored by the instruction.  Set high arg to r_src just
@@ -4488,8 +4627,8 @@
 
       switch (e->Iex.Binop.op) {
       case Iop_D64HLtoD128:
-         r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1 );
-         r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2 );
+         r_srcHi = iselDfp64Expr( env, e->Iex.Binop.arg1, IEndianess );
+         r_srcLo = iselDfp64Expr( env, e->Iex.Binop.arg2, IEndianess );
          *rHi = r_srcHi;
          *rLo = r_srcLo;
          return;
@@ -4498,8 +4637,9 @@
          PPCFpOp fpop = Pfp_DRDPQ;
          HReg fr_dst  = newVRegF(env);
 
-         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1 );
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
+         set_FPU_DFP_rounding_mode( env, e->Iex.Binop.arg1, IEndianess );
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
+                        IEndianess);
          addInstr(env, PPCInstr_DfpD128toD64(fpop, fr_dst, r_srcHi, r_srcLo));
 
          /* Need to meet the interface spec but the result is
@@ -4513,10 +4653,11 @@
       case Iop_ShrD128: {
          HReg fr_dst_hi = newVRegF(env);  
          HReg fr_dst_lo = newVRegF(env);
-         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2);
+         PPCRI* shift = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
          PPCFpOp fpop = Pfp_DSCLIQ;  /* fix later if necessary */
 
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1);
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg1,
+                        IEndianess);
 
          if (e->Iex.Binop.op == Iop_ShrD128)
             fpop = Pfp_DSCRIQ;
@@ -4531,10 +4672,11 @@
       case Iop_RoundD128toInt: {
          HReg r_dstHi = newVRegF(env);
          HReg r_dstLo = newVRegF(env);
-         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1);
+         PPCRI* r_rmc = iselWordExpr_RI(env, e->Iex.Binop.arg1, IEndianess);
 
          // will set R and RMC when issuing instruction
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
+                        IEndianess);
 
          addInstr(env, PPCInstr_DfpRound128(r_dstHi, r_dstLo,
                                             r_srcHi, r_srcLo, r_rmc));
@@ -4550,17 +4692,19 @@
          r_srcHi = newVRegF(env);
          r_srcLo = newVRegF(env);
 
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2);
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, e->Iex.Binop.arg2,
+                        IEndianess);
 
          /* Move I64 to float register to issue instruction */
          if (env->mode64) {
-            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            HReg tmp = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
             addInstr(env, PPCInstr_Store(8, zero_r1, tmp, True/*mode64*/));
          } else {
             HReg tmpHi, tmpLo;
             PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
 
-            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg);
+            iselInt64Expr(&tmpHi, &tmpLo, env, e->Iex.Unop.arg,
+                          IEndianess);
             addInstr(env, PPCInstr_Store(4, zero_r1, tmpHi, False/*mode32*/));
             addInstr(env, PPCInstr_Store(4, four_r1, tmpLo, False/*mode32*/));
          }
@@ -4608,9 +4752,9 @@
          HReg r_srcRLo = newVRegV( env );
 
          /* dst will be used to pass in the left operand and get the result. */
-         iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2 );
-         iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3 );
-         set_FPU_DFP_rounding_mode( env, triop->arg1 );
+         iselDfp128Expr( &r_dstHi, &r_dstLo, env, triop->arg2, IEndianess );
+         iselDfp128Expr( &r_srcRHi, &r_srcRLo, env, triop->arg3, IEndianess );
+         set_FPU_DFP_rounding_mode( env, triop->arg1, IEndianess );
          addInstr( env,
                    PPCInstr_Dfp128Binary( fpop, r_dstHi, r_dstLo,
                                           r_srcRHi, r_srcRLo ) );
@@ -4626,11 +4770,11 @@
       if (fpop == Pfp_DQUAQ) {
          HReg r_srcHi = newVRegF(env);
          HReg r_srcLo = newVRegF(env);
-         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1);
+         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
 
          /* dst will be used to pass in the left operand and get the result */
-         iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2);
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3);
+         iselDfp128Expr(&r_dstHi, &r_dstLo, env, triop->arg2, IEndianess);
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
 
          // will set RMC when issuing instruction
          addInstr(env, PPCInstr_DfpQuantize128(fpop, r_dstHi, r_dstLo,
@@ -4642,13 +4786,13 @@
       } else if (fpop == Pfp_DRRNDQ) {
          HReg r_srcHi = newVRegF(env);
          HReg r_srcLo = newVRegF(env);
-         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1);
+         PPCRI* rmc = iselWordExpr_RI(env, triop->arg1, IEndianess);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
          PPCAMode* four_r1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
-         HReg i8_val = iselWordExpr_R(env, triop->arg2);
+         HReg i8_val = iselWordExpr_R(env, triop->arg2, IEndianess);
          HReg r_zero = newVRegI( env );
 
-         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3);
+         iselDfp128Expr(&r_srcHi, &r_srcLo, env, triop->arg3, IEndianess);
 
          /* dst will be used to pass in the left operand and get the result */
          /* Move I8 to float register to issue instruction.  Note, the
@@ -4690,9 +4834,9 @@
 /*--- ISEL: SIMD (Vector) expressions, 128 bit.         ---*/
 /*---------------------------------------------------------*/
 
-static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
-   HReg r = iselVecExpr_wrk( env, e );
+   HReg r = iselVecExpr_wrk( env, e, IEndianess );
 #  if 0
    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
 #  endif
@@ -4702,7 +4846,7 @@
 }
 
 /* DO NOT CALL THIS DIRECTLY */
-static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess )
 {
    Bool mode64 = env->mode64;
    PPCAvOp op = Pav_INVALID;
@@ -4726,11 +4870,12 @@
       return dst;
    }
 
-   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+   if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) {
       PPCAMode* am_addr;
       HReg v_dst = newVRegV(env);
       vassert(e->Iex.Load.ty == Ity_V128);
-      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_V128/*xfer*/);
+      am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_V128/*xfer*/,
+                                   IEndianess);
       addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, v_dst, am_addr));
       return v_dst;
    }
@@ -4739,14 +4884,14 @@
       switch (e->Iex.Unop.op) {
 
       case Iop_NotV128: {
-         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg dst = newVRegV(env);
          addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
          return dst;
       }
 
       case Iop_CmpNEZ8x16: {
-         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg zero = newVRegV(env);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
@@ -4756,7 +4901,7 @@
       }
 
       case Iop_CmpNEZ16x8: {
-         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg zero = newVRegV(env);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
@@ -4766,7 +4911,7 @@
       }
 
       case Iop_CmpNEZ32x4: {
-         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg zero = newVRegV(env);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
@@ -4776,7 +4921,7 @@
       }
 
       case Iop_CmpNEZ64x2: {
-         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg  = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg zero = newVRegV(env);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
@@ -4797,7 +4942,7 @@
       case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
       do_32Fx4_unary:
       {
-         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg dst = newVRegV(env);
          addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
          return dst;
@@ -4805,7 +4950,7 @@
 
       case Iop_32UtoV128: {
          HReg r_aligned16, r_zeros;
-         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+         HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
          HReg   dst = newVRegV(env);
          PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
          sub_from_sp( env, 32 );     // Move SP down
@@ -4820,15 +4965,24 @@
          /* Store zeros */
          r_zeros = newVRegI(env);
          addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
-         addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
+         if (IEndianess == Iend_LE)
+            addInstr(env, PPCInstr_Store( 4, am_off0, r_src, mode64 ));
+         else
+            addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
          addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
          addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
 
          /* Store r_src in low word of quadword-aligned mem */
-         addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
+         if (IEndianess == Iend_LE)
+            addInstr(env, PPCInstr_Store( 4, am_off12, r_zeros, mode64 ));
+         else
+            addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
 
          /* Load word into low word of quadword vector reg */
-         addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
+         if (IEndianess == Iend_LE)
+            addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off0 ));
+         else
+            addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
 
          add_to_sp( env, 32 );       // Reset SP
          return dst;
@@ -4837,11 +4991,11 @@
       case Iop_Dup8x16:
       case Iop_Dup16x8:
       case Iop_Dup32x4:
-         return mk_AvDuplicateRI(env, e->Iex.Unop.arg);
+         return mk_AvDuplicateRI(env, e->Iex.Unop.arg, IEndianess);
 
       case Iop_CipherSV128: op = Pav_CIPHERSUBV128; goto do_AvCipherV128Un;
       do_AvCipherV128Un: {
-         HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+         HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
          HReg dst = newVRegV(env);
          addInstr(env, PPCInstr_AvCipherV128Unary(op, dst, arg));
          return dst;
@@ -4854,7 +5008,7 @@
       case Iop_PwBitMtxXpose64x2: op = Pav_BITMTXXPOSE;  goto do_zerocnt;
       do_zerocnt:
       {
-        HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+        HReg arg = iselVecExpr(env, e->Iex.Unop.arg, IEndianess);
         HReg dst = newVRegV(env);
         addInstr(env, PPCInstr_AvUnary(op, dst, arg));
         return dst;
@@ -4884,11 +5038,11 @@
             am_off12 = PPCAMode_IR( 12, r_aligned16 );
             
             /* Do the less significant 64 bits */
-            iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
+            iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2, IEndianess);
             addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
             addInstr(env, PPCInstr_Store( 4, am_off8,  r1, mode64 ));
             /* Do the more significant 64 bits */
-            iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
+            iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1, IEndianess);
             addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
             addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
             
@@ -4898,8 +5052,8 @@
             add_to_sp( env, 32 );          // Reset SP
             return dst;
          } else {
-            HReg     rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
-            HReg     rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            HReg     rHi = iselWordExpr_R(env, e->Iex.Binop.arg1, IEndianess);
+            HReg     rLo = iselWordExpr_R(env, e->Iex.Binop.arg2, IEndianess);
             HReg     dst = newVRegV(env);
             HReg     r_aligned16;
             PPCAMode *am_off0, *am_off8;
@@ -4912,9 +5066,13 @@
             am_off8  = PPCAMode_IR( 8,  r_aligned16 );
             
             /* Store 2*I64 to stack */
-            addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
-            addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
-
+            if (IEndianess == Iend_LE) {
+               addInstr(env, PPCInstr_Store( 8, am_off0, rLo, mode64 ));
+               addInstr(env, PPCInstr_Store( 8, am_off8, rHi, mode64 ));
+            } else {
+               addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
+               addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
+            }
             /* Fetch result back from stack. */
             addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
             
@@ -4930,16 +5088,16 @@
       case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
       do_32Fx4:
       {
-         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst = newVRegV(env);
          addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
          return dst;
       }
 
       case Iop_CmpLE32Fx4: {
-         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst = newVRegV(env);
          
          /* stay consistent with native ppc compares:
@@ -4947,8 +5105,8 @@
             so: le == NOT(gt OR isNan)
           */
          HReg isNanLR = newVRegV(env);
-         HReg isNanL = isNan(env, argL);
-         HReg isNanR = isNan(env, argR);
+         HReg isNanL = isNan(env, argL, IEndianess);
+         HReg isNanR = isNan(env, argR, IEndianess);
          addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
                                          isNanL, isNanR));
 
@@ -4963,8 +5121,8 @@
       case Iop_OrV128:     op = Pav_OR;       goto do_AvBin;
       case Iop_XorV128:    op = Pav_XOR;      goto do_AvBin;
       do_AvBin: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
          return dst;
@@ -4995,8 +5153,8 @@
       case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
       case Iop_PolynomialMulAdd8x16: op = Pav_POLYMULADD; goto do_AvBin8x16;
       do_AvBin8x16: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
          return dst;
@@ -5030,8 +5188,8 @@
       case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
       case Iop_PolynomialMulAdd16x8: op = Pav_POLYMULADD; goto do_AvBin16x8;
       do_AvBin16x8: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
          return dst;
@@ -5068,8 +5226,8 @@
       case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4;
       case Iop_PolynomialMulAdd32x4: op = Pav_POLYMULADD; goto do_AvBin32x4;
       do_AvBin32x4: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
          return dst;
@@ -5095,8 +5253,8 @@
       case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2;
       case Iop_PolynomialMulAdd64x2: op = Pav_POLYMULADD; goto do_AvBin64x2;
       do_AvBin64x2: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvBin64x2(op, dst, arg1, arg2));
          return dst;
@@ -5105,9 +5263,9 @@
       case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
       case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
       do_AvShift8x16: {
-         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
          HReg dst    = newVRegV(env);
-         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
          return dst;
       }
@@ -5116,9 +5274,9 @@
       case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
       case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
       do_AvShift16x8: {
-         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
          HReg dst    = newVRegV(env);
-         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
          return dst;
       }
@@ -5127,9 +5285,9 @@
       case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
       case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
       do_AvShift32x4: {
-         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
          HReg dst    = newVRegV(env);
-         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
          return dst;
       }
@@ -5138,9 +5296,9 @@
       case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2;
       case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2;
       do_AvShift64x2: {
-         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
          HReg dst    = newVRegV(env);
-         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft));
          return dst;
       }
@@ -5149,8 +5307,8 @@
       case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
       do_AvShiftV128: {
          HReg dst    = newVRegV(env);
-         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+         HReg r_src  = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2, IEndianess);
          /* Note: shift value gets masked by 127 */
          addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
          return dst;
@@ -5158,8 +5316,8 @@
 
       case Iop_Perm8x16: {
          HReg dst   = newVRegV(env);
-         HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
          return dst;
       }
@@ -5169,8 +5327,8 @@
       case Iop_NCipherV128: op = Pav_NCIPHERV128;  goto do_AvCipherV128;
       case Iop_NCipherLV128:op = Pav_NCIPHERLV128; goto do_AvCipherV128;
       do_AvCipherV128: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
-         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2, IEndianess);
          HReg dst  = newVRegV(env);
          addInstr(env, PPCInstr_AvCipherV128Binary(op, dst, arg1, arg2));
          return dst;
@@ -5179,9 +5337,9 @@
       case Iop_SHA256:op = Pav_SHA256; goto do_AvHashV128;
       case Iop_SHA512:op = Pav_SHA512; goto do_AvHashV128;
       do_AvHashV128: {
-         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1, IEndianess);
          HReg dst  = newVRegV(env);
-         PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2);
+         PPCRI* s_field = iselWordExpr_RI(env, e->Iex.Binop.arg2, IEndianess);
          addInstr(env, PPCInstr_AvHashV128Binary(op, dst, arg1, s_field));
          return dst;
       }
@@ -5196,10 +5354,10 @@
       case Iop_BCDAdd:op = Pav_BCDAdd; goto do_AvBCDV128;
       case Iop_BCDSub:op = Pav_BCDSub; goto do_AvBCDV128;
       do_AvBCDV128: {
-         HReg arg1 = iselVecExpr(env, triop->arg1);
-         HReg arg2 = iselVecExpr(env, triop->arg2);
+         HReg arg1 = iselVecExpr(env, triop->arg1, IEndianess);
+         HReg arg2 = iselVecExpr(env, triop->arg2, IEndianess);
          HReg dst  = newVRegV(env);
-         PPCRI* ps = iselWordExpr_RI(env, triop->arg3);
+         PPCRI* ps = iselWordExpr_RI(env, triop->arg3, IEndianess);
          addInstr(env, PPCInstr_AvBCDV128Trinary(op, dst, arg1, arg2, ps));
          return dst;
       }
@@ -5209,8 +5367,8 @@
       case Iop_Mul32Fx4: fpop = Pavfp_MULF; goto do_32Fx4_with_rm;
       do_32Fx4_with_rm:
       {
-         HReg argL = iselVecExpr(env, triop->arg2);
-         HReg argR = iselVecExpr(env, triop->arg3);
+         HReg argL = iselVecExpr(env, triop->arg2, IEndianess);
+         HReg argR = iselVecExpr(env, triop->arg3, IEndianess);
          HReg dst  = newVRegV(env);
          /* FIXME: this is bogus, in the sense that Altivec ignores
             FPSCR.RM, at least for some FP operations.  So setting the
@@ -5218,7 +5376,7 @@
             where the RM is known, at JIT time, to be Irrm_NEAREST,
             since -- at least for Altivec FP add/sub/mul -- the
             emitted insn is hardwired to round to nearest. */
-         set_FPU_rounding_mode(env, triop->arg1);
+         set_FPU_rounding_mode(env, triop->arg1, IEndianess);
          addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
          return dst;
       }
@@ -5251,7 +5409,7 @@
 /*--- ISEL: Statements                                  ---*/
 /*---------------------------------------------------------*/
 
-static void iselStmt ( ISelEnv* env, IRStmt* stmt )
+static void iselStmt ( ISelEnv* env, IRStmt* stmt, IREndness IEndianess )
 {
    Bool mode64 = env->mode64;
    if (vex_traceflags & VEX_TRACE_VCODE) {
@@ -5268,7 +5426,7 @@
       IRType    tyd   = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
       IREndness end   = stmt->Ist.Store.end;
 
-      if (end != Iend_BE)
+      if (end != IEndianess)
          goto stmt_fail;
       if (!mode64 && (tya != Ity_I32))
          goto stmt_fail;
@@ -5278,48 +5436,54 @@
       if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
           (mode64 && (tyd == Ity_I64))) {
          PPCAMode* am_addr
-            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
-         HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data);
+            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
+                                 IEndianess);
+         HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data, IEndianess);
          addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)), 
                                        am_addr, r_src, mode64 ));
          return;
       }
       if (tyd == Ity_F64) {
          PPCAMode* am_addr
-            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
-         HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data);
+            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
+                                 IEndianess);
+         HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data, IEndianess);
          addInstr(env,
                   PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
          return;
       }
       if (tyd == Ity_F32) {
          PPCAMode* am_addr
-            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
-         HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data);
+            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
+                                 IEndianess);
+         HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data, IEndianess);
          addInstr(env,
                   PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
          return;
       }
       if (tyd == Ity_D64) {
          PPCAMode* am_addr
-            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
-         HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data);
+            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
+                                 IEndianess);
+         HReg fr_src = iselDfp64Expr(env, stmt->Ist.Store.data, IEndianess);
          addInstr(env,
                   PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
          return;
       }
       if (tyd == Ity_D32) {
          PPCAMode* am_addr
-            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
-         HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data);
+            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
+                                 IEndianess);
+         HReg fr_src = iselDfp32Expr(env, stmt->Ist.Store.data, IEndianess);
          addInstr(env,
                   PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
          return;
       }
       if (tyd == Ity_V128) {
          PPCAMode* am_addr
-            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
-         HReg v_src = iselVecExpr(env, stmt->Ist.Store.data);
+            = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/,
+                                 IEndianess);
+         HReg v_src = iselVecExpr(env, stmt->Ist.Store.data, IEndianess);
          addInstr(env,
                   PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
          return;
@@ -5329,8 +5493,9 @@
             short to arse around trying and possibly failing to adjust
             the offset in a 'reg+offset' style amode. */
          HReg rHi32, rLo32;
-         HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr);
-         iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data );
+         HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr, IEndianess);
+         iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data,
+                        IEndianess );
          addInstr(env, PPCInstr_Store( 4/*byte-store*/,
                                        PPCAMode_IR( 0, r_addr ), 
                                        rHi32,
@@ -5349,7 +5514,7 @@
       IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
       if (ty == Ity_I8  || ty == Ity_I16 ||
           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
-         HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data);
+         HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data, IEndianess);
          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
                                           GuestStatePtr(mode64) );
          addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)), 
@@ -5361,7 +5526,7 @@
          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
                                            GuestStatePtr(mode64) );
          PPCAMode* am_addr4 = advance4(env, am_addr);
-         iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data);
+         iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data, IEndianess);
          addInstr(env, PPCInstr_Store( 4, am_addr,  rHi, mode64 ));
          addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
          return;
@@ -5369,7 +5534,7 @@
      if (ty == Ity_V128) {
          /* Guest state vectors are 16byte aligned,
             so don't need to worry here */
-         HReg v_src = iselVecExpr(env, stmt->Ist.Put.data);
+         HReg v_src = iselVecExpr(env, stmt->Ist.Put.data, IEndianess);
          PPCAMode* am_addr  = PPCAMode_IR( stmt->Ist.Put.offset,
                                            GuestStatePtr(mode64) );
          addInstr(env,
@@ -5377,7 +5542,7 @@
          return;
       }
       if (ty == Ity_F64) {
-         HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data);
+         HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data, IEndianess);
          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
                                           GuestStatePtr(mode64) );
          addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
@@ -5386,7 +5551,7 @@
       }
       if (ty == Ity_D32) {
          /* The 32-bit value is stored in a 64-bit register */
-         HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data );
+         HReg fr_src = iselDfp32Expr( env, stmt->Ist.Put.data, IEndianess );
          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
                                           GuestStatePtr(mode64) );
          addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8,
@@ -5394,7 +5559,7 @@
          return;
       }
       if (ty == Ity_D64) {
-         HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data );
+         HReg fr_src = iselDfp64Expr( env, stmt->Ist.Put.data, IEndianess );
          PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
                                           GuestStatePtr(mode64) );
          addInstr( env, PPCInstr_FpLdSt( False/*store*/, 8, fr_src, am_addr ) );
@@ -5410,16 +5575,17 @@
       PPCAMode* dst_am
          = genGuestArrayOffset(
               env, puti->descr, 
-                   puti->ix, puti->bias );
+              puti->ix, puti->bias,
+              IEndianess );
       IRType ty = typeOfIRExpr(env->type_env, puti->data);
       if (mode64 && ty == Ity_I64) {
-         HReg r_src = iselWordExpr_R(env, puti->data);
+         HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
          addInstr(env, PPCInstr_Store( toUChar(8),
                                        dst_am, r_src, mode64 ));
          return;
       }
       if ((!mode64) && ty == Ity_I32) {
-         HReg r_src = iselWordExpr_R(env, puti->data);
+         HReg r_src = iselWordExpr_R(env, puti->data, IEndianess);
          addInstr(env, PPCInstr_Store( toUChar(4),
                                        dst_am, r_src, mode64 ));
          return;
@@ -5434,14 +5600,15 @@
       if (ty == Ity_I8  || ty == Ity_I16 ||
           ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
          HReg r_dst = lookupIRTemp(env, tmp);
-         HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data);
+         HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data, IEndianess);
          addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
          return;
       }
       if (!mode64 && ty == Ity_I64) {
          HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
 
-         iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data);
+         iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
+                       IEndianess);
          lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
          addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
@@ -5449,7 +5616,8 @@
       }
       if (mode64 && ty == Ity_I128) {
          HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
-         iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data);
+         iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data,
+                        IEndianess);
          lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
          addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
          addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
@@ -5461,7 +5629,7 @@
 
          iselInt128Expr_to_32x4(&r_srcHi, &r_srcMedHi,
                                 &r_srcMedLo, &r_srcLo,
-                                env, stmt->Ist.WrTmp.data);
+                                env, stmt->Ist.WrTmp.data, IEndianess);
 
          lookupIRTempQuad( &r_dstHi, &r_dstMedHi, &r_dstMedLo,
                            &r_dstLo, env, tmp);
@@ -5473,38 +5641,39 @@
          return;
       }
       if (ty == Ity_I1) {
-         PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
+         PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data,
+                                         IEndianess);
          HReg r_dst = lookupIRTemp(env, tmp);
          addInstr(env, PPCInstr_Set(cond, r_dst));
          return;
       }
       if (ty == Ity_F64) {
          HReg fr_dst = lookupIRTemp(env, tmp);
-         HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data);
+         HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data, IEndianess);
          addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
          return;
       }
       if (ty == Ity_F32) {
          HReg fr_dst = lookupIRTemp(env, tmp);
-         HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+         HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data, IEndianess);
          addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
          return;
       }
       if (ty == Ity_D32) {
          HReg fr_dst = lookupIRTemp(env, tmp);
-         HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data);
+         HReg fr_src = iselDfp32Expr(env, stmt->Ist.WrTmp.data, IEndianess);
          addInstr(env, PPCInstr_Dfp64Unary(Pfp_MOV, fr_dst, fr_src));
          return;
       }
       if (ty == Ity_V128) {
          HReg v_dst = lookupIRTemp(env, tmp);
-         HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data);
+         HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data, IEndianess);
          addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
          return;
       }
       if (ty == Ity_D64) {
          HReg fr_dst = lookupIRTemp( env, tmp );
-         HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data );
+         HReg fr_src = iselDfp64Expr( env, stmt->Ist.WrTmp.data, IEndianess );
          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dst, fr_src ) );
          return;
       }
@@ -5512,7 +5681,8 @@
          HReg fr_srcHi, fr_srcLo, fr_dstHi, fr_dstLo;
 	 //         lookupDfp128IRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
          lookupIRTempPair( &fr_dstHi, &fr_dstLo, env, tmp );
-         iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data );
+         iselDfp128Expr( &fr_srcHi, &fr_srcLo, env, stmt->Ist.WrTmp.data,
+                         IEndianess );
          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstHi, fr_srcHi ) );
          addInstr( env, PPCInstr_Dfp64Unary( Pfp_MOV, fr_dstLo, fr_srcLo ) );
          return;
@@ -5526,7 +5696,7 @@
       IRType tyRes  = typeOfIRTemp(env->type_env, res);
       IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
 
-      if (stmt->Ist.LLSC.end != Iend_BE)
+      if (stmt->Ist.LLSC.end != IEndianess)
          goto stmt_fail;
       if (!mode64 && (tyAddr != Ity_I32))
          goto stmt_fail;
@@ -5535,7 +5705,7 @@
 
       if (stmt->Ist.LLSC.storedata == NULL) {
          /* LL */
-         HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr );
+         HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
          HReg r_dst  = lookupIRTemp(env, res);
          if (tyRes == Ity_I32) {
             addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
@@ -5549,8 +5719,9 @@
       } else {
          /* SC */
          HReg   r_res  = lookupIRTemp(env, res); /* :: Ity_I1 */
-         HReg   r_a    = iselWordExpr_R(env, stmt->Ist.LLSC.addr);
-         HReg   r_src  = iselWordExpr_R(env, stmt->Ist.LLSC.storedata);
+         HReg   r_a    = iselWordExpr_R(env, stmt->Ist.LLSC.addr, IEndianess);
+         HReg   r_src  = iselWordExpr_R(env, stmt->Ist.LLSC.storedata,
+                                        IEndianess);
          HReg   r_tmp  = newVRegI(env);
          IRType tyData = typeOfIRExpr(env->type_env,
                                       stmt->Ist.LLSC.storedata);
@@ -5619,7 +5790,8 @@
          value and the call is skipped. */
       UInt   addToSp = 0;
       RetLoc rloc    = mk_RetLoc_INVALID();
-      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
+      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args,
+                    IEndianess );
       vassert(is_sane_RetLoc(rloc));
 
       /* Now figure out what to do with the returned value, if any. */
@@ -5714,7 +5886,7 @@
       if (mode64 && dst->tag != Ico_U64)
          vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
 
-      PPCCondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard);
+      PPCCondCode cc    = iselCondCode(env, stmt->Ist.Exit.guard, IEndianess);
       PPCAMode*   amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
                                       hregPPC_GPR31(mode64));
 
@@ -5739,7 +5911,8 @@
             /* .. very occasionally .. */
             /* We can't use chaining, so ask for an assisted transfer,
                as that's the only alternative that is allowable. */
-            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
+                                    IEndianess);
             addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
          }
          return;
@@ -5758,7 +5931,8 @@
          case Ijk_Sys_syscall:
          case Ijk_InvalICache:
          {
-            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+            HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst),
+                                    IEndianess);
             addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
                                              stmt->Ist.Exit.jk));
             return;
@@ -5784,7 +5958,8 @@
 /*---------------------------------------------------------*/
 
 static void iselNext ( ISelEnv* env,
-                       IRExpr* next, IRJumpKind jk, Int offsIP )
+                       IRExpr* next, IRJumpKind jk, Int offsIP,
+                       IREndness IEndianess)
 {
    if (vex_traceflags & VEX_TRACE_VCODE) {
       vex_printf( "\n-- PUT(%d) = ", offsIP);
@@ -5820,7 +5995,7 @@
             /* .. very occasionally .. */
             /* We can't use chaining, so ask for an assisted transfer,
                as that's the only alternative that is allowable. */
-            HReg r = iselWordExpr_R(env, next);
+            HReg r = iselWordExpr_R(env, next, IEndianess);
             addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
                                              Ijk_Boring));
          }
@@ -5831,7 +6006,7 @@
    /* Case: call/return (==boring) transfer to any address */
    switch (jk) {
       case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
-         HReg       r     = iselWordExpr_R(env, next);
+         HReg       r     = iselWordExpr_R(env, next, IEndianess);
          PPCAMode*  amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
          if (env->chainingAllowed) {
             addInstr(env, PPCInstr_XIndir(r, amCIA, always));
@@ -5858,7 +6033,7 @@
       case Ijk_Sys_syscall:
       case Ijk_InvalICache:
       {
-         HReg      r     = iselWordExpr_R(env, next);
+         HReg      r     = iselWordExpr_R(env, next, IEndianess);
          PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
          addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
          return;
@@ -5889,7 +6064,8 @@
                           Int offs_Host_EvC_FailAddr,
                           Bool chainingAllowed,
                           Bool addProfInc,
-                          Addr64 max_ga )
+                          Addr64 max_ga)
+
 {
    Int       i, j;
    HReg      hregLo, hregMedLo, hregMedHi, hregHi;
@@ -5898,7 +6074,7 @@
    Bool      mode64 = False;
    UInt      mask32, mask64;
    PPCAMode *amCounter, *amFailAddr;
-
+   IREndness IEndianess;
 
    vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
    mode64 = arch_host == VexArchPPC64;
@@ -5921,7 +6097,13 @@
    }
 
    /* Check that the host's endianness is as expected. */
-   vassert(archinfo_host->endness == VexEndnessBE);
+   vassert((archinfo_host->endness == VexEndnessBE) ||
+	   (archinfo_host->endness == VexEndnessLE));
+
+   if (archinfo_host->endness == VexEndnessBE)
+     IEndianess = Iend_BE;
+   else
+     IEndianess = Iend_LE;
 
    /* Make up an initial environment to use. */
    env = LibVEX_Alloc(sizeof(ISelEnv));
@@ -6021,9 +6203,9 @@
 
    /* Ok, finally we can iterate over the statements. */
    for (i = 0; i < bb->stmts_used; i++)
-      iselStmt(env, bb->stmts[i]);
+      iselStmt(env, bb->stmts[i], IEndianess);
 
-   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
+   iselNext(env, bb->next, bb->jumpkind, bb->offsIP, IEndianess);
 
    /* record the number of vregs we used. */
    env->code->n_vregs = env->vreg_ctr;
diff --git a/priv/main_main.c b/priv/main_main.c
index cf9789b..fa5f3a8 100644
--- a/priv/main_main.c
+++ b/priv/main_main.c
@@ -1,6 +1,6 @@
 
 /*---------------------------------------------------------------*/
-/*--- begin                                       main_main.c ---*/
+/*--- Begin                                       main_main.c ---*/
 /*---------------------------------------------------------------*/
 
 /*
@@ -378,8 +378,8 @@
                        emit_PPCInstr;
          host_word_type    = Ity_I64;
          vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
-         vassert(vta->archinfo_host.endness == VexEndnessBE
-                 /* later: || vta->archinfo_host.endness == VexEndnessLE */);
+         vassert(vta->archinfo_host.endness == VexEndnessBE ||
+                 vta->archinfo_host.endness == VexEndnessLE );
          break;
 
       case VexArchS390X:
@@ -571,8 +571,8 @@
          offB_HOST_EvC_COUNTER  = offsetof(VexGuestPPC64State,host_EvC_COUNTER);
          offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR);
          vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps));
-         vassert(vta->archinfo_guest.endness == VexEndnessBE
-                 /* later: || vta->archinfo_guest.endness == VexEndnessBE */);
+         vassert(vta->archinfo_guest.endness == VexEndnessBE ||
+                 vta->archinfo_guest.endness == VexEndnessLE );
          vassert(0 == sizeof(VexGuestPPC64State) % 16);
          vassert(sizeof( ((VexGuestPPC64State*)0)->guest_CMSTART    ) == 8);
          vassert(sizeof( ((VexGuestPPC64State*)0)->guest_CMLEN      ) == 8);