Add support for the lbarx, lharx, stbcx and sthcs instructions.

The instructions are part of the ISA 2.06 but were not implemented
in all versions of hardware.  The four instructions are all supported
in ISA 2.07.  The instructions were put under the ISA 2.07 category
of supported instructions in this patch.

The bugzilla for this issue is 346324.


git-svn-id: svn://svn.valgrind.org/vex/trunk@3137 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/guest_ppc_toIR.c b/priv/guest_ppc_toIR.c
index 3ca2f55..c76a2ef 100644
--- a/priv/guest_ppc_toIR.c
+++ b/priv/guest_ppc_toIR.c
@@ -1729,7 +1729,7 @@
    restart of the current insn. */
 static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align )
 {
-   vassert(align == 4 || align == 8 || align == 16);
+   vassert(align == 2 || align == 4 || align == 8 || align == 16);
    if (mode64) {
       vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64);
       stmt(
@@ -6292,6 +6292,41 @@
          break;
       }
 
+      case 0x034: { // lbarx (Load Word and Reserve Indexed)
+         IRTemp res;
+         /* According to the PowerPC ISA version 2.05, b0 (called EH
+            in the documentation) is merely a hint bit to the
+            hardware, I think as to whether or not contention is
+            likely.  So we can just ignore it. */
+         DIP("lbarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
+
+         // and actually do the load
+         res = newTemp(Ity_I8);
+         stmt( stmt_load(res, mkexpr(EA), NULL/*this is a load*/) );
+
+         putIReg( rD_addr, mkWidenFrom8(ty, mkexpr(res), False) );
+         break;
+     }
+
+      case 0x074: { // lharx (Load Word and Reserve Indexed)
+         IRTemp res;
+         /* According to the PowerPC ISA version 2.05, b0 (called EH
+            in the documentation) is merely a hint bit to the
+            hardware, I think as to whether or not contention is
+            likely.  So we can just ignore it. */
+         DIP("lharx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
+
+         // trap if misaligned
+         gen_SIGBUS_if_misaligned( EA, 2 );
+
+         // and actually do the load
+         res = newTemp(Ity_I16);
+         stmt( stmt_load(res, mkexpr(EA), NULL/*this is a load*/) );
+
+         putIReg( rD_addr, mkWidenFrom16(ty, mkexpr(res), False) );
+         break;
+      }
+
       case 0x096: { 
          // stwcx. (Store Word Conditional Indexed, PPC32 p532)
          // Note this has to handle stwcx. in both 32- and 64-bit modes,
@@ -6326,6 +6361,71 @@
          break;
       }
 
+      case 0x2B6: {
+         // stbcx. (Store Byte Conditional Indexed)
+         // Note this has to handle stbcx. in both 32- and 64-bit modes,
+         // so isn't quite as straightforward as it might otherwise be.
+         IRTemp rS = newTemp(Ity_I8);
+         IRTemp resSC;
+         if (b0 != 1) {
+            vex_printf("dis_memsync(ppc)(stbcx.,b0)\n");
+            return False;
+         }
+         DIP("stbcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+         // Get the data to be stored, and narrow to 32 bits if necessary
+         assign( rS, mkNarrowTo8(ty, getIReg(rS_addr)) );
+
+         // Do the store, and get success/failure bit into resSC
+         resSC = newTemp(Ity_I1);
+         stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS)) );
+
+         // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
+         // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]  on success
+         putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+         putCR0(0, getXER_SO());
+
+         /* Note:
+            If resaddr != lbarx_resaddr, CR0[EQ] is undefined, and
+            whether rS is stored is dependent on that value. */
+         /* So I guess we can just ignore this case? */
+         break;
+      }
+
+      case 0x2D6: {
+         // sthcx. (Store Word Conditional Indexed, PPC32 p532)
+         // Note this has to handle sthcx. in both 32- and 64-bit modes,
+         // so isn't quite as straightforward as it might otherwise be.
+         IRTemp rS = newTemp(Ity_I16);
+         IRTemp resSC;
+         if (b0 != 1) {
+            vex_printf("dis_memsync(ppc)(stwcx.,b0)\n");
+            return False;
+         }
+         DIP("sthcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+         // trap if misaligned
+         gen_SIGBUS_if_misaligned( EA, 2 );
+
+         // Get the data to be stored, and narrow to 16 bits if necessary
+         assign( rS, mkNarrowTo16(ty, getIReg(rS_addr)) );
+
+         // Do the store, and get success/failure bit into resSC
+         resSC = newTemp(Ity_I1);
+         stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS)) );
+
+         // Set CR0[LT GT EQ S0] = 0b000 || XER[SO]  on failure
+         // Set CR0[LT GT EQ S0] = 0b001 || XER[SO]  on success
+         putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+         putCR0(0, getXER_SO());
+
+         /* Note:
+            If resaddr != lharx_resaddr, CR0[EQ] is undefined, and
+            whether rS is stored is dependent on that value. */
+         /* So I guess we can just ignore this case? */
+         break;
+      }
+
       case 0x256: // sync (Synchronize, PPC32 p543), 
                   // also lwsync (L==1), ptesync (L==2)
          /* http://sources.redhat.com/ml/binutils/2000-12/msg00311.html
@@ -19668,6 +19768,12 @@
       }
 
       /* Memory Synchronization Instructions */
+      case 0x034: case 0x074:             // lbarx, lharx
+      case 0x2B6: case 0x2D6:             // stbcx, sthcx
+         if (!allow_isa_2_07) goto decode_noP8;
+         if (dis_memsync( theInstr )) goto decode_success;
+         goto decode_failure;
+
       case 0x356: case 0x014: case 0x096: // eieio, lwarx, stwcx.
       case 0x256:                         // sync
          if (dis_memsync( theInstr )) goto decode_success;
diff --git a/priv/host_ppc_defs.c b/priv/host_ppc_defs.c
index 740403e..e9de08b 100644
--- a/priv/host_ppc_defs.c
+++ b/priv/host_ppc_defs.c
@@ -861,7 +861,7 @@
    i->Pin.LoadL.sz   = sz;
    i->Pin.LoadL.src  = src;
    i->Pin.LoadL.dst  = dst;
-   vassert(sz == 4 || sz == 8);
+   vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
    if (sz == 8) vassert(mode64);
    return i;
 }
@@ -882,7 +882,7 @@
    i->Pin.StoreC.sz  = sz;
    i->Pin.StoreC.src = src;
    i->Pin.StoreC.dst = dst;
-   vassert(sz == 4 || sz == 8);
+   vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
    if (sz == 8) vassert(mode64);
    return i;
 }
@@ -1644,12 +1644,15 @@
       ppPPCAMode(i->Pin.Load.src);
       return;
    }
-   case Pin_LoadL:
-      vex_printf("l%carx ", i->Pin.LoadL.sz==4 ? 'w' : 'd');
+   case Pin_LoadL: {
+      UChar sz = i->Pin.LoadL.sz;
+      HChar c_sz = sz==1 ? 'b' : sz==2 ? 'h' : sz==4 ? 'w' : 'd';
+      vex_printf("l%carx ", c_sz);
       ppHRegPPC(i->Pin.LoadL.dst);
       vex_printf(",%%r0,");
       ppHRegPPC(i->Pin.LoadL.src);
       return;
+   }
    case Pin_Store: {
       UChar sz = i->Pin.Store.sz;
       Bool idxd = toBool(i->Pin.Store.dst->tag == Pam_RR);
@@ -1660,12 +1663,15 @@
       ppPPCAMode(i->Pin.Store.dst);
       return;
    }
-   case Pin_StoreC:
-      vex_printf("st%ccx. ", i->Pin.StoreC.sz==4 ? 'w' : 'd');
+   case Pin_StoreC: {
+      UChar sz = i->Pin.StoreC.sz;
+      HChar c_sz = sz==1 ? 'b' : sz==2 ? 'h' : sz==4 ? 'w' : 'd';
+      vex_printf("st%ccx. ", c_sz);
       ppHRegPPC(i->Pin.StoreC.src);
       vex_printf(",%%r0,");
       ppHRegPPC(i->Pin.StoreC.dst);
       return;
+   }
    case Pin_Set: {
       PPCCondCode cc = i->Pin.Set.cond;
       vex_printf("set (%s),", showPPCCondCode(cc));
@@ -4399,6 +4405,16 @@
    }
 
    case Pin_LoadL: {
+      if (i->Pin.LoadL.sz == 1) {
+         p = mkFormX(p, 31, iregEnc(i->Pin.LoadL.dst, mode64),
+                     0, iregEnc(i->Pin.LoadL.src, mode64), 52, 0, endness_host);
+         goto done;
+      }
+      if (i->Pin.LoadL.sz == 2) {
+         p = mkFormX(p, 31, iregEnc(i->Pin.LoadL.dst, mode64),
+                     0, iregEnc(i->Pin.LoadL.src, mode64), 116, 0, endness_host);
+         goto done;
+      }
       if (i->Pin.LoadL.sz == 4) {
          p = mkFormX(p, 31, iregEnc(i->Pin.LoadL.dst, mode64),
                      0, iregEnc(i->Pin.LoadL.src, mode64), 20, 0, endness_host);
@@ -4495,6 +4511,17 @@
    }
 
    case Pin_StoreC: {
+      if (i->Pin.StoreC.sz == 1) {
+         p = mkFormX(p, 31, iregEnc(i->Pin.StoreC.src, mode64),
+                     0, iregEnc(i->Pin.StoreC.dst, mode64), 694, 1, endness_host);
+         goto done;
+      }
+      if (i->Pin.StoreC.sz == 2) {
+         p = mkFormX(p, 31, iregEnc(i->Pin.StoreC.src, mode64),
+                     0, iregEnc(i->Pin.StoreC.dst, mode64), 726, 1, endness_host);
+         goto done;
+      }
+
       if (i->Pin.StoreC.sz == 4) {
          p = mkFormX(p, 31, iregEnc(i->Pin.StoreC.src, mode64),
                      0, iregEnc(i->Pin.StoreC.dst, mode64), 150, 1, endness_host);
diff --git a/priv/host_ppc_isel.c b/priv/host_ppc_isel.c
index a2b0398..40fe895 100644
--- a/priv/host_ppc_isel.c
+++ b/priv/host_ppc_isel.c
@@ -5754,6 +5754,14 @@
          /* LL */
          HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
          HReg r_dst  = lookupIRTemp(env, res);
+         if (tyRes == Ity_I8) {
+            addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
+            return;
+         }
+         if (tyRes == Ity_I16) {
+            addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
+            return;
+         }
          if (tyRes == Ity_I32) {
             addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
             return;
@@ -5773,8 +5781,20 @@
          IRType tyData = typeOfIRExpr(env->type_env,
                                       stmt->Ist.LLSC.storedata);
          vassert(tyRes == Ity_I1);
-         if (tyData == Ity_I32 || (tyData == Ity_I64 && mode64)) {
-            addInstr(env, PPCInstr_StoreC( tyData==Ity_I32 ? 4 : 8,
+         if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
+            (tyData == Ity_I64 && mode64)) {
+            int size = 0;
+
+            if (tyData == Ity_I64)
+               size = 8;
+            else if (tyData == Ity_I32)
+               size = 4;
+            else if (tyData == Ity_I16)
+               size = 2;
+            else if (tyData == Ity_I8)
+               size = 1;
+
+            addInstr(env, PPCInstr_StoreC( size,
                                            r_a, r_src, mode64 ));
             addInstr(env, PPCInstr_MfCR( r_tmp ));
             addInstr(env, PPCInstr_Shft(