Improve code generation on s390x for assignment of constant
values to guest registers. Motivated by the observation that
piecing together a 64-bit value requires 4 insns on z900 and 2 insns
on newer models. Specifically:
(1) Assigning 0 can be done by using XC
(2) Assigning a value that differs by a small amount from the
    value previously assigned can be done using AGSI
    (Happens a lot for guest IA updates).
(3) If the new value differs from the previous one only
    in the lower word it is sufficient to assign the lower word.
(4) If the new value equals the old value the assignment is redundant
    and can be eliminated. This happens surprisingly often.
This buys us somewhere between 5% and 11.8% of insns (as measured
on the perf bucket).


git-svn-id: svn://svn.valgrind.org/vex/trunk@2258 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/auxprogs/genoffsets.c b/auxprogs/genoffsets.c
index c371a9d..e0dd3e9 100644
--- a/auxprogs/genoffsets.c
+++ b/auxprogs/genoffsets.c
@@ -169,6 +169,10 @@
    GENOFFSET(S390X,s390x,SYSNO);
    GENOFFSET(S390X,s390x,IP_AT_SYSCALL);
    GENOFFSET(S390X,s390x,fpc);
+   GENOFFSET(S390X,s390x,CC_OP);
+   GENOFFSET(S390X,s390x,CC_DEP1);
+   GENOFFSET(S390X,s390x,CC_DEP2);
+   GENOFFSET(S390X,s390x,CC_NDEP);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/priv/guest_s390_defs.h b/priv/guest_s390_defs.h
index bf4b762..754ce3d 100644
--- a/priv/guest_s390_defs.h
+++ b/priv/guest_s390_defs.h
@@ -37,6 +37,7 @@
 #include "libvex_ir.h"                // IRSB  (needed by bb_to_IR.h)
 #include "libvex.h"                   // VexArch  (needed by bb_to_IR.h)
 #include "guest_generic_bb_to_IR.h"   // DisResult
+#include "libvex_guest_s390x.h"       // VexGuestS390XState
 
 
 /* Convert one s390 insn to IR.  See the type DisOneInstrFn in
diff --git a/priv/guest_s390_toIR.c b/priv/guest_s390_toIR.c
index 7319781..47e3ab5 100644
--- a/priv/guest_s390_toIR.c
+++ b/priv/guest_s390_toIR.c
@@ -13593,7 +13593,7 @@
 
 DisResult
 disInstr_S390(IRSB        *irsb_IN,
-              Bool         put_IP,
+              Bool         put_IP __attribute__((unused)),
               Bool       (*resteerOkFn)(void *, Addr64),
               Bool         resteerCisOk,
               void        *callback_opaque,
@@ -13616,10 +13616,9 @@
    resteer_fn = resteerOkFn;
    resteer_data = callback_opaque;
 
-   /* We may be asked to update the guest IA before going further. */
-   if (put_IP)
-      addStmtToIRSB(irsb, IRStmt_Put(S390X_GUEST_OFFSET(guest_IA),
-                                     mkaddr_expr(guest_IA_curr_instr)));
+   /* Always update the guest IA. See comment in s390_isel_stmt for Ist_Put. */
+   addStmtToIRSB(irsb, IRStmt_Put(S390X_GUEST_OFFSET(guest_IA),
+                                  mkaddr_expr(guest_IA_curr_instr)));
 
    return disInstr_S390_WRK(guest_code + delta);
 }
diff --git a/priv/host_s390_defs.c b/priv/host_s390_defs.c
index f086e55..2efc105 100644
--- a/priv/host_s390_defs.c
+++ b/priv/host_s390_defs.c
@@ -714,6 +714,8 @@
       break;
 
    case S390_INSN_MFENCE:
+   case S390_INSN_GZERO:
+   case S390_INSN_GADD:
       break;
 
    default:
@@ -917,6 +919,8 @@
       break;
 
    case S390_INSN_MFENCE:
+   case S390_INSN_GZERO:
+   case S390_INSN_GADD:
       break;
 
    default:
@@ -1115,6 +1119,35 @@
 }
 
 
+static UChar *
+emit_SIY(UChar *p, ULong op, UChar i2, UChar b1, UShort dl1, UChar dh1)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)i2) << 32;
+   the_insn |= ((ULong)b1) << 28;
+   the_insn |= ((ULong)dl1) << 16;
+   the_insn |= ((ULong)dh1) << 8;
+
+   return emit_6bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_SSa(UChar *p, ULong op, UChar l, UChar b1, UShort d1, UChar b2, UShort d2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)l)  << 32;
+   the_insn |= ((ULong)b1) << 28;
+   the_insn |= ((ULong)d1) << 16;
+   the_insn |= ((ULong)b2) << 12;
+   the_insn |= ((ULong)d2) << 0;
+
+   return emit_6bytes(p, the_insn);
+}
+
+
 /*------------------------------------------------------------*/
 /*--- Functions to emit particular instructions            ---*/
 /*------------------------------------------------------------*/
@@ -1240,6 +1273,18 @@
 
 
 static UChar *
+s390_emit_AGSI(UChar *p, UChar i2, UChar b1, UShort dl1, UChar dh1)
+{
+   vassert(s390_host_has_gie);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, INT, SDXB), "agsi", (Int)(Char)i2, dh1, dl1, 0, b1);
+
+   return emit_SIY(p, 0xeb000000007aULL, i2, b1, dl1, dh1);
+}
+
+
+static UChar *
 s390_emit_NR(UChar *p, UChar r1, UChar r2)
 {
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
@@ -1688,6 +1733,16 @@
 
 
 static UChar *
+s390_emit_XC(UChar *p, UInt l, UChar b1, UShort d1, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, UDLB, UDXB), "xc", d1, l, b1, d2, 0, b2);
+
+   return emit_SSa(p, 0xd70000000000ULL, l, b1, d1, b2, d2);
+}
+
+
+static UChar *
 s390_emit_FLOGR(UChar *p, UChar r1, UChar r2)
 {
    vassert(s390_host_has_eimm);
@@ -4406,6 +4461,34 @@
 }
 
 
+s390_insn *
+s390_insn_gzero(UChar size, UInt offset)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_GZERO;
+   insn->size = size;
+   insn->variant.gzero.offset = offset;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_gadd(UChar size, UInt offset, UChar delta, ULong value)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_GADD;
+   insn->size = size;
+   insn->variant.gadd.offset = offset;
+   insn->variant.gadd.delta = delta;
+   insn->variant.gadd.value = value;
+
+   return insn;
+}
+
+
 /*---------------------------------------------------------------*/
 /*--- Debug print                                             ---*/
 /*---------------------------------------------------------------*/
@@ -4477,6 +4560,10 @@
                           s390_amode_as_string(va_arg(args, s390_amode *)));
          continue;
 
+      case 'G':     /* %G = guest state @ offset */
+         p += vex_sprintf(p, "guest[%d]", va_arg(args, UInt));
+         continue;
+
       case 'C':     /* %C = condition code */
          p += vex_sprintf(p, "%s", s390_cc_as_string(va_arg(args, s390_cc_t)));
          continue;
@@ -4821,6 +4908,17 @@
       s390_sprintf(buf, "%M", "v-mfence");
       return buf;   /* avoid printing "size = ..." which is meaningless */
 
+   case S390_INSN_GZERO:
+      s390_sprintf(buf, "%M %G", "v-gzero", insn->variant.gzero.offset);
+      break;
+
+   case S390_INSN_GADD:
+      s390_sprintf(buf, "%M %G += %I  (= %I)", "v-gadd",
+                   insn->variant.gadd.offset,
+                   (Long)(Char)insn->variant.gadd.delta,
+                   insn->variant.gadd.value);
+      break;
+
    default: goto fail;
    }
 
@@ -7042,6 +7140,24 @@
 }
 
 
+static UChar *
+s390_insn_gzero_emit(UChar *buf, const s390_insn *insn)
+{
+   return s390_emit_XC(buf, insn->size - 1,
+                       S390_REGNO_GUEST_STATE_POINTER, insn->variant.gzero.offset,
+                       S390_REGNO_GUEST_STATE_POINTER, insn->variant.gzero.offset);
+}
+
+
+static UChar *
+s390_insn_gadd_emit(UChar *buf, const s390_insn *insn)
+{
+   return s390_emit_AGSI(buf, insn->variant.gadd.delta,
+                         S390_REGNO_GUEST_STATE_POINTER,
+                         DISP20(insn->variant.gadd.offset));
+}
+
+
 Int
 emit_S390Instr(UChar *buf, Int nbuf, s390_insn *insn, Bool mode64,
                void *dispatch_unassisted, void *dispatch_assisted)
@@ -7159,6 +7275,14 @@
       end = s390_insn_mfence_emit(buf, insn);
       break;
 
+   case S390_INSN_GZERO:
+      end = s390_insn_gzero_emit(buf, insn);
+      break;
+
+   case S390_INSN_GADD:
+      end = s390_insn_gadd_emit(buf, insn);
+      break;
+
    default:
       vpanic("s390_insn_emit");
    }
diff --git a/priv/host_s390_defs.h b/priv/host_s390_defs.h
index cdce301..b5dff68 100644
--- a/priv/host_s390_defs.h
+++ b/priv/host_s390_defs.h
@@ -142,7 +142,9 @@
    S390_INSN_BFP128_COMPARE,
    S390_INSN_BFP128_CONVERT_TO,
    S390_INSN_BFP128_CONVERT_FROM,
-   S390_INSN_MFENCE
+   S390_INSN_MFENCE,
+   S390_INSN_GZERO,   /* Assign zero to a guest register */
+   S390_INSN_GADD     /* Add a value to a guest register */
 } s390_insn_tag;
 
 
@@ -397,6 +399,14 @@
          HReg             op2_hi; /* right operand; high part */
          HReg             op2_lo; /* right operand; low part */
       } bfp128_compare;
+      struct {
+         UInt             offset;
+      } gzero;
+      struct {
+         UInt             offset;
+         UChar            delta;
+         ULong            value;  /* for debugging only */
+      } gadd;
    } variant;
 } s390_insn;
 
@@ -447,6 +457,8 @@
                                          HReg dst, HReg op_hi, HReg op_lo,
                                          s390_round_t);
 s390_insn *s390_insn_mfence(void);
+s390_insn *s390_insn_gzero(UChar size, UInt offset);
+s390_insn *s390_insn_gadd(UChar size, UInt offset, UChar delta, ULong value);
 UInt       s390_insn_emit(UChar *buf, Int nbuf, const s390_insn *insn,
                           void *dispatch);
 
diff --git a/priv/host_s390_isel.c b/priv/host_s390_isel.c
index 2bc23e8..f86301f 100644
--- a/priv/host_s390_isel.c
+++ b/priv/host_s390_isel.c
@@ -34,10 +34,11 @@
 #include "libvex_ir.h"
 #include "libvex.h"
 #include "libvex_s390x_common.h"
+#include "libvex_guest_offsets.h"
 
-#include "ir_match.h"
 #include "main_util.h"
 #include "main_globals.h"
+#include "guest_s390_defs.h"   /* guest_s390x_state_requires_precise_mem_exns */
 #include "host_generic_regs.h"
 #include "host_s390_defs.h"
 
@@ -68,8 +69,27 @@
 
     - The host subarchitecture we are selecting insns for.
       This is set at the start and does not change.
+
+    - A flag to indicate whether the guest IA has been assigned to.
+
+    - Values of certain guest registers which are often assigned constants.
 */
 
+/* Symbolic names for guest registers whose value we're tracking */
+enum {
+   GUEST_IA,
+   GUEST_CC_OP,
+   GUEST_CC_DEP1,
+   GUEST_CC_DEP2,
+   GUEST_CC_NDEP,
+   GUEST_SYSNO,
+   GUEST_UNKNOWN    /* must be the last entry */
+};
+
+/* Number of registers we're tracking. */
+#define NUM_TRACKED_REGS GUEST_UNKNOWN
+
+
 typedef struct {
    IRTypeEnv   *type_env;
 
@@ -79,10 +99,12 @@
 
    HInstrArray *code;
 
+   ULong        old_value[NUM_TRACKED_REGS];
    UInt         vreg_ctr;
 
    UInt         hwcaps;
-
+   Bool         first_IA_assignment;
+   Bool         old_value_valid[NUM_TRACKED_REGS];
 } ISelEnv;
 
 
@@ -96,6 +118,33 @@
 static void          s390_isel_float128_expr(HReg *, HReg *, ISelEnv *, IRExpr *);
 
 
+static Int
+get_guest_reg(Int offset)
+{
+   switch (offset) {
+   case OFFSET_s390x_IA:        return GUEST_IA;
+   case OFFSET_s390x_CC_OP:     return GUEST_CC_OP;
+   case OFFSET_s390x_CC_DEP1:   return GUEST_CC_DEP1;
+   case OFFSET_s390x_CC_DEP2:   return GUEST_CC_DEP2;
+   case OFFSET_s390x_CC_NDEP:   return GUEST_CC_NDEP;
+   case OFFSET_s390x_SYSNO:     return GUEST_SYSNO;
+
+      /* Also make sure there is never a partial write to one of
+         these registers. That would complicate matters. */
+   case OFFSET_s390x_IA+1      ... OFFSET_s390x_IA+7:
+   case OFFSET_s390x_CC_OP+1   ... OFFSET_s390x_CC_OP+7:
+   case OFFSET_s390x_CC_DEP1+1 ... OFFSET_s390x_CC_DEP1+7:
+   case OFFSET_s390x_CC_DEP2+1 ... OFFSET_s390x_CC_DEP2+7:
+   case OFFSET_s390x_CC_NDEP+1 ... OFFSET_s390x_CC_NDEP+7:
+      vassert("partial update of this guest state register is not allowed");
+      break;
+
+   default: break;
+   }
+
+   return GUEST_UNKNOWN;
+}
+
 /* Add an instruction */
 static void
 addInstr(ISelEnv *env, s390_insn *insn)
@@ -203,6 +252,16 @@
 }
 
 
+static __inline__ Bool
+ulong_fits_signed_8bit(ULong val)
+{
+   Long v = val & 0xFFu;
+
+   v = (v << 56) >> 56;  /* sign extend */
+
+   return val == (ULong)v;
+}
+
 /* EXPR is an expression that is used as an address. Return an s390_amode
    for it. */
 static s390_amode *
@@ -2139,7 +2198,98 @@
       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
       HReg src;
       s390_amode *am;
+      ULong new_value, old_value, difference;
 
+      /* Detect updates to certain guest registers. We track the contents
+         of those registers as long as they contain constants. If the new
+         constant is either zero or in the 8-bit neighbourhood of the
+         current value we can use a memory-to-memory insn to do the update. */
+
+      Int offset = stmt->Ist.Put.offset;
+
+      /* Check necessary conditions:
+         (1) must be one of the registers we care about
+         (2) assigned value must be a constant */
+      Int guest_reg = get_guest_reg(offset);
+
+      if (guest_reg == GUEST_UNKNOWN) goto not_special;
+
+      if (guest_reg == GUEST_IA) {
+         /* If this is the first assignment to the IA reg, don't special case
+            it. We need to do a full 8-byte assignment here. The reason is 
+            that in case of a redirected translation the guest IA does not 
+            contain the redirected-to address. Instead it contains the 
+            redirected-from address and those can be far apart. So in order to
+            do incremnetal updates if the IA in the future we need to get the
+            initial address of the super block correct. */
+         if (env->first_IA_assignment) {
+            env->first_IA_assignment = False;
+            goto not_special;
+         }
+      }
+
+      if (stmt->Ist.Put.data->tag != Iex_Const) {
+         /* Invalidate guest register contents */
+         env->old_value_valid[guest_reg] = False;
+         goto not_special;
+      }
+
+      /* OK. Necessary conditions are satisfied. */
+
+      /* Get the old value and update it */
+      vassert(tyd == Ity_I64);
+
+      old_value = env->old_value[guest_reg];
+      new_value = stmt->Ist.Put.data->Iex.Const.con->Ico.U64;
+      env->old_value[guest_reg] = new_value;
+
+      Bool old_value_is_valid = env->old_value_valid[guest_reg];
+      env->old_value_valid[guest_reg] = True;
+
+      /* If the register already contains the new value, there is nothing
+         to do here. Unless the guest register requires precise memory
+         exceptions. */
+      if (old_value_is_valid && new_value == old_value) {
+         if (! guest_s390x_state_requires_precise_mem_exns(offset, offset + 8)) {
+            return;
+         }
+      }
+
+      /* guest register = 0 */
+      if (new_value == 0) {
+         addInstr(env, s390_insn_gzero(sizeofIRType(tyd), offset));
+         return;
+      }
+
+      if (old_value_is_valid == False) goto not_special;
+
+      /* If the new value is in the neighbourhood of the old value
+         we can use a memory-to-memory insn */
+      difference = new_value - old_value;
+
+      if (s390_host_has_gie && ulong_fits_signed_8bit(difference)) {
+         addInstr(env, s390_insn_gadd(sizeofIRType(tyd), offset,
+                                      (difference & 0xFF), new_value));
+         return;
+      }
+
+      /* If the high word is the same it is sufficient to load the low word.
+         Use R0 as a scratch reg. */
+      if ((old_value >> 32) == (new_value >> 32)) {
+         HReg r0  = make_gpr(env, 0);
+         HReg gsp = make_gpr(env, S390_REGNO_GUEST_STATE_POINTER);
+         s390_amode *gam;
+
+         gam = s390_amode_b12(offset + 4, gsp);
+         addInstr(env, s390_insn_load_immediate(4, r0,
+                                                new_value & 0xFFFFFFFF));
+         addInstr(env, s390_insn_store(4, gam, r0));
+         return;
+      }
+
+      /* No special case applies... fall through */
+
+   not_special:
       am = s390_amode_for_guest_state(stmt->Ist.Put.offset);
 
       switch (tyd) {
@@ -2230,6 +2380,17 @@
       IRType   retty;
       IRDirty* d = stmt->Ist.Dirty.details;
       Bool     passBBP;
+      Int i;
+
+      /* Invalidate tracked values of those guest state registers that are
+         modified by this helper. */
+      for (i = 0; i < d->nFxState; ++i) {
+         if ((d->fxState[i].fx == Ifx_Write || d->fxState[i].fx == Ifx_Modify)) {
+            Int guest_reg = get_guest_reg(d->fxState[i].offset);
+            if (guest_reg != GUEST_UNKNOWN)
+               env->old_value_valid[guest_reg] = False;
+         }
+      }
 
       if (d->nFxState == 0)
          vassert(!d->needsBBP);
@@ -2372,6 +2533,13 @@
    /* Copy BB's type env. */
    env->type_env = bb->tyenv;
 
+   /* Set up data structures for tracking guest register values. */
+   env->first_IA_assignment = True;
+   for (i = 0; i < NUM_TRACKED_REGS; ++i) {
+      env->old_value[i] = 0;  /* just something to have a defined value */
+      env->old_value_valid[i] = False;
+   }
+
    /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
       change as we go along. For some reason types_used has Int type -- but
       it should be unsigned. Internally we use an unsigned type; so we