8256453: C2: Reduce State footprint

Reviewed-by: neliasso, kvn
diff --git a/src/hotspot/share/adlc/dfa.cpp b/src/hotspot/share/adlc/dfa.cpp
index da516b8..b1556ab 100644
--- a/src/hotspot/share/adlc/dfa.cpp
+++ b/src/hotspot/share/adlc/dfa.cpp
@@ -29,14 +29,6 @@
 static bool debug_output   = false;
 static bool debug_output1  = false;    // top level chain rules
 
-//---------------------------Access to internals of class State----------------
-static const char *sLeft   = "_kids[0]";
-static const char *sRight  = "_kids[1]";
-
-//---------------------------DFA productions-----------------------------------
-static const char *dfa_production           = "DFA_PRODUCTION";
-static const char *dfa_production_set_valid = "DFA_PRODUCTION__SET_VALID";
-
 //---------------------------Production State----------------------------------
 static const char *knownInvalid = "knownInvalid";    // The result does NOT have a rule defined
 static const char *knownValid   = "knownValid";      // The result must be produced by a rule
@@ -111,7 +103,7 @@
 //---------------------------Helper Functions----------------------------------
 // cost_check template:
 // 1)      if (STATE__NOT_YET_VALID(EBXREGI) || _cost[EBXREGI] > c) {
-// 2)        DFA_PRODUCTION__SET_VALID(EBXREGI, cmovI_memu_rule, c)
+// 2)        DFA_PRODUCTION(EBXREGI, cmovI_memu_rule, c)
 // 3)      }
 //
 static void cost_check(FILE *fp, const char *spaces,
@@ -162,13 +154,13 @@
   }
 
   // line 2)
-  // no need to set State vector if our state is knownValid
-  const char *production = (validity_check == knownValid) ? dfa_production : dfa_production_set_valid;
-  fprintf(fp, "%s  %s(%s, %s_rule, %s)", spaces, production, arrayIdx, rule, cost->as_string() );
-  if( validity_check == knownValid ) {
-    if( cost_is_below_lower_bound ) { fprintf(fp, "\t  // overwrites higher cost rule"); }
-   }
-   fprintf(fp, "\n");
+  fprintf(fp, "%s  DFA_PRODUCTION(%s, %s_rule, %s)", spaces, arrayIdx, rule, cost->as_string() );
+  if (validity_check == knownValid) {
+    if (cost_is_below_lower_bound) {
+      fprintf(fp, "\t  // overwrites higher cost rule");
+    }
+  }
+  fprintf(fp, "\n");
 
   // line 3)
   if( cost_check || state_check ) {
@@ -276,7 +268,7 @@
 
   // Check if this rule should be used to generate the chains as well.
   const char *rule = /* set rule to "Invalid" for internal operands */
-    strcmp(mList._opcode,mList._resultStr) ? mList._opcode : "Invalid";
+    strcmp(mList._opcode, mList._resultStr) ? mList._opcode : "Invalid";
 
   // If this rule produces an operand which has associated chain rules,
   // update the operands with the chain rule + this rule cost & this rule.
@@ -396,16 +388,8 @@
   _attributes.output(fp);
   fprintf(fp, "\n");
   fprintf(fp, "//------------------------- Macros -----------------------------------------\n");
-  // #define DFA_PRODUCTION(result, rule, cost)\
-  //   _cost[ (result) ] = cost; _rule[ (result) ] = rule;
-  fprintf(fp, "#define %s(result, rule, cost)\\\n", dfa_production);
-  fprintf(fp, "  _cost[ (result) ] = cost; _rule[ (result) ] = rule;\n");
-  fprintf(fp, "\n");
-
-  // #define DFA_PRODUCTION__SET_VALID(result, rule, cost)\
-  //     DFA_PRODUCTION( (result), (rule), (cost) ); STATE__SET_VALID( (result) );
-  fprintf(fp, "#define %s(result, rule, cost)\\\n", dfa_production_set_valid);
-  fprintf(fp, "  %s( (result), (rule), (cost) ); STATE__SET_VALID( (result) );\n", dfa_production);
+  fprintf(fp, "#define DFA_PRODUCTION(result, rule, cost)\\\n");
+  fprintf(fp, "  assert(rule < (1 << 15), \"too many rules\"); _cost[ (result) ] = cost; _rule[ (result) ] = (rule << 1) | 0x1;\n");
   fprintf(fp, "\n");
 
   fprintf(fp, "//------------------------- DFA --------------------------------------------\n");
diff --git a/src/hotspot/share/adlc/output_h.cpp b/src/hotspot/share/adlc/output_h.cpp
index d7ae412..8c5ea2b 100644
--- a/src/hotspot/share/adlc/output_h.cpp
+++ b/src/hotspot/share/adlc/output_h.cpp
@@ -2012,26 +2012,17 @@
 }
 
 void ArchDesc::defineStateClass(FILE *fp) {
-  static const char *state__valid    = "_valid[((uint)index) >> 5] &  (0x1 << (((uint)index) & 0x0001F))";
-  static const char *state__set_valid= "_valid[((uint)index) >> 5] |= (0x1 << (((uint)index) & 0x0001F))";
+  static const char *state__valid    = "_rule[index] & 0x1";
 
   fprintf(fp,"\n");
   fprintf(fp,"// MACROS to inline and constant fold State::valid(index)...\n");
   fprintf(fp,"// when given a constant 'index' in dfa_<arch>.cpp\n");
-  fprintf(fp,"//   uint word   = index >> 5;       // Shift out bit position\n");
-  fprintf(fp,"//   uint bitpos = index & 0x0001F;  // Mask off word bits\n");
-  fprintf(fp,"#define STATE__VALID(index) ");
-  fprintf(fp,"    (%s)\n", state__valid);
-  fprintf(fp,"\n");
   fprintf(fp,"#define STATE__NOT_YET_VALID(index) ");
   fprintf(fp,"  ( (%s) == 0 )\n", state__valid);
   fprintf(fp,"\n");
   fprintf(fp,"#define STATE__VALID_CHILD(state,index) ");
   fprintf(fp,"  ( state && (state->%s) )\n", state__valid);
   fprintf(fp,"\n");
-  fprintf(fp,"#define STATE__SET_VALID(index) ");
-  fprintf(fp,"  (%s)\n", state__set_valid);
-  fprintf(fp,"\n");
   fprintf(fp,
           "//---------------------------State-------------------------------------------\n");
   fprintf(fp,"// State contains an integral cost vector, indexed by machine operand opcodes,\n");
@@ -2040,16 +2031,18 @@
   fprintf(fp,"// tree generated by the Label routines in ideal nodes (currently limited to\n");
   fprintf(fp,"// two for convenience, but this could change).\n");
   fprintf(fp,"class State : public ResourceObj {\n");
+  fprintf(fp,"private:\n");
+  fprintf(fp,"  unsigned int _cost[_LAST_MACH_OPER];  // Costs, indexed by operand opcodes\n");
+  fprintf(fp,"  uint16_t     _rule[_LAST_MACH_OPER];  // Rule and validity, indexed by operand opcodes\n");
+  fprintf(fp,"                                        // Lowest bit encodes validity\n");
+
   fprintf(fp,"public:\n");
-  fprintf(fp,"  int    _id;         // State identifier\n");
-  fprintf(fp,"  Node  *_leaf;       // Ideal (non-machine-node) leaf of match tree\n");
-  fprintf(fp,"  State *_kids[2];       // Children of state node in label tree\n");
-  fprintf(fp,"  unsigned int _cost[_LAST_MACH_OPER];  // Cost vector, indexed by operand opcodes\n");
-  fprintf(fp,"  unsigned int _rule[_LAST_MACH_OPER];  // Rule vector, indexed by operand opcodes\n");
-  fprintf(fp,"  unsigned int _valid[(_LAST_MACH_OPER/32)+1]; // Bit Map of valid Cost/Rule entries\n");
+  fprintf(fp,"  int    _id;                           // State identifier\n");
+  fprintf(fp,"  Node  *_leaf;                         // Ideal (non-machine-node) leaf of match tree\n");
+  fprintf(fp,"  State *_kids[2];                      // Children of state node in label tree\n");
   fprintf(fp,"\n");
-  fprintf(fp,"  State(void);                      // Constructor\n");
-  fprintf(fp,"  DEBUG_ONLY( ~State(void); )       // Destructor\n");
+  fprintf(fp,"  State(void);\n");
+  fprintf(fp,"  DEBUG_ONLY( ~State(void); )\n");
   fprintf(fp,"\n");
   fprintf(fp,"  // Methods created by ADLC and invoked by Reduce\n");
   fprintf(fp,"  MachOper *MachOperGenerator(int opcode);\n");
@@ -2058,14 +2051,14 @@
   fprintf(fp,"  // Assign a state to a node, definition of method produced by ADLC\n");
   fprintf(fp,"  bool DFA( int opcode, const Node *ideal );\n");
   fprintf(fp,"\n");
-  fprintf(fp,"  // Access function for _valid bit vector\n");
   fprintf(fp,"  bool valid(uint index) {\n");
-  fprintf(fp,"    return( STATE__VALID(index) != 0 );\n");
+  fprintf(fp,"    return %s;\n", state__valid);
   fprintf(fp,"  }\n");
-  fprintf(fp,"\n");
-  fprintf(fp,"  // Set function for _valid bit vector\n");
-  fprintf(fp,"  void set_valid(uint index) {\n");
-  fprintf(fp,"    STATE__SET_VALID(index);\n");
+  fprintf(fp,"  unsigned int rule(uint index) {\n");
+  fprintf(fp,"    return _rule[index] >> 1;\n");
+  fprintf(fp,"  }\n");
+  fprintf(fp,"  unsigned int cost(uint index) {\n");
+  fprintf(fp,"    return _cost[index];\n");
   fprintf(fp,"  }\n");
   fprintf(fp,"\n");
   fprintf(fp,"#ifndef PRODUCT\n");
diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
index a06252c..0c09c27 100644
--- a/src/hotspot/share/opto/matcher.cpp
+++ b/src/hotspot/share/opto/matcher.cpp
@@ -1453,11 +1453,13 @@
   uint mincost = max_juint;
   uint cost = max_juint;
   uint i;
-  for( i = 0; i < NUM_OPERANDS; i++ ) {
-    if( s->valid(i) &&                // valid entry and
-        s->_cost[i] < cost &&         // low cost and
-        s->_rule[i] >= NUM_OPERANDS ) // not an operand
-      cost = s->_cost[mincost=i];
+  for (i = 0; i < NUM_OPERANDS; i++) {
+    if (s->valid(i) &&               // valid entry and
+        s->cost(i) < cost &&         // low cost and
+        s->rule(i) >= NUM_OPERANDS) {// not an operand
+      mincost = i;
+      cost = s->cost(i);
+    }
   }
   if (mincost == max_juint) {
 #ifndef PRODUCT
@@ -1468,7 +1470,7 @@
     return NULL;
   }
   // Reduce input tree based upon the state labels to machine Nodes
-  MachNode *m = ReduceInst( s, s->_rule[mincost], mem );
+  MachNode *m = ReduceInst(s, s->rule(mincost), mem);
 #ifdef ASSERT
   _old2new_map.map(n->_idx, m);
   _new2old_map.map(m->_idx, (Node*)n);
@@ -1836,29 +1838,28 @@
   }
 }
 
-void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
+void Matcher::ReduceInst_Chain_Rule(State* s, int rule, Node* &mem, MachNode* mach) {
   // 'op' is what I am expecting to receive
   int op = _leftOp[rule];
   // Operand type to catch childs result
   // This is what my child will give me.
-  int opnd_class_instance = s->_rule[op];
+  unsigned int opnd_class_instance = s->rule(op);
   // Choose between operand class or not.
   // This is what I will receive.
   int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
   // New rule for child.  Chase operand classes to get the actual rule.
-  int newrule = s->_rule[catch_op];
+  unsigned int newrule = s->rule(catch_op);
 
-  if( newrule < NUM_OPERANDS ) {
+  if (newrule < NUM_OPERANDS) {
     // Chain from operand or operand class, may be output of shared node
-    assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
-            "Bad AD file: Instruction chain rule must chain from operand");
+    assert(opnd_class_instance < NUM_OPERANDS, "Bad AD file: Instruction chain rule must chain from operand");
     // Insert operand into array of operands for this instruction
     mach->_opnds[1] = s->MachOperGenerator(opnd_class_instance);
 
-    ReduceOper( s, newrule, mem, mach );
+    ReduceOper(s, newrule, mem, mach);
   } else {
     // Chain from the result of an instruction
-    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
+    assert(newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
     mach->_opnds[1] = s->MachOperGenerator(_reduceOp[catch_op]);
     Node *mem1 = (Node*)1;
     debug_only(Node *save_mem_node = _mem_node;)
@@ -1896,24 +1897,24 @@
     }
     // Operand type to catch childs result
     // This is what my child will give me.
-    int opnd_class_instance = newstate->_rule[op];
+    int opnd_class_instance = newstate->rule(op);
     // Choose between operand class or not.
     // This is what I will receive.
     int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
     // New rule for child.  Chase operand classes to get the actual rule.
-    int newrule = newstate->_rule[catch_op];
+    int newrule = newstate->rule(catch_op);
 
-    if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
+    if (newrule < NUM_OPERANDS) { // Operand/operandClass or internalOp/instruction?
       // Operand/operandClass
       // Insert operand into array of operands for this instruction
       mach->_opnds[num_opnds++] = newstate->MachOperGenerator(opnd_class_instance);
-      ReduceOper( newstate, newrule, mem, mach );
+      ReduceOper(newstate, newrule, mem, mach);
 
     } else {                    // Child is internal operand or new instruction
-      if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction?
+      if (newrule < _LAST_MACH_OPER) { // internal operand or instruction?
         // internal operand --> call ReduceInst_Interior
         // Interior of complex instruction.  Do nothing but recurse.
-        num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds );
+        num_opnds = ReduceInst_Interior(newstate, newrule, mem, mach, num_opnds);
       } else {
         // instruction --> call build operand(  ) to catch result
         //             --> ReduceInst( newrule )
@@ -1965,16 +1966,17 @@
     }
   }
 
-  for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) {   // binary tree
+  for (uint i = 0; kid != NULL && i < 2; kid = s->_kids[1], i++) {   // binary tree
     int newrule;
-    if( i == 0)
-      newrule = kid->_rule[_leftOp[rule]];
-    else
-      newrule = kid->_rule[_rightOp[rule]];
+    if( i == 0) {
+      newrule = kid->rule(_leftOp[rule]);
+    } else {
+      newrule = kid->rule(_rightOp[rule]);
+    }
 
-    if( newrule < _LAST_MACH_OPER ) { // Operand or instruction?
+    if (newrule < _LAST_MACH_OPER) { // Operand or instruction?
       // Internal operand; recurse but do nothing else
-      ReduceOper( kid, newrule, mem, mach );
+      ReduceOper(kid, newrule, mem, mach);
 
     } else {                    // Child is a new instruction
       // Reduce the instruction, and add a direct pointer from this
@@ -2808,15 +2810,12 @@
 
 //=============================================================================
 //---------------------------State---------------------------------------------
-State::State(void) {
+State::State(void) : _rule() {
 #ifdef ASSERT
   _id = 0;
   _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
   _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
-  //memset(_cost, -1, sizeof(_cost));
-  //memset(_rule, -1, sizeof(_rule));
 #endif
-  memset(_valid, 0, sizeof(_valid));
 }
 
 #ifdef ASSERT
@@ -2837,25 +2836,30 @@
 }
 
 void State::dump(int depth) {
-  for( int j = 0; j < depth; j++ )
+  for (int j = 0; j < depth; j++) {
     tty->print("   ");
+  }
   tty->print("--N: ");
   _leaf->dump();
   uint i;
-  for( i = 0; i < _LAST_MACH_OPER; i++ )
+  for (i = 0; i < _LAST_MACH_OPER; i++) {
     // Check for valid entry
-    if( valid(i) ) {
-      for( int j = 0; j < depth; j++ )
+    if (valid(i)) {
+      for (int j = 0; j < depth; j++) {
         tty->print("   ");
-        assert(_cost[i] != max_juint, "cost must be a valid value");
-        assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule");
-        tty->print_cr("%s  %d  %s",
-                      ruleName[i], _cost[i], ruleName[_rule[i]] );
       }
+      assert(cost(i) != max_juint, "cost must be a valid value");
+      assert(rule(i) < _last_Mach_Node, "rule[i] must be valid rule");
+      tty->print_cr("%s  %d  %s",
+                    ruleName[i], cost(i), ruleName[rule(i)] );
+    }
+  }
   tty->cr();
 
-  for( i=0; i<2; i++ )
-    if( _kids[i] )
-      _kids[i]->dump(depth+1);
+  for (i = 0; i < 2; i++) {
+    if (_kids[i]) {
+      _kids[i]->dump(depth + 1);
+    }
+  }
 }
 #endif