[morx] Try using buffer-glyph-set adaptive to number of chains
diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh
index 8feb654..17624cb 100644
--- a/src/hb-aat-layout-common.hh
+++ b/src/hb-aat-layout-common.hh
@@ -66,6 +66,7 @@
   const ankr *ankr_table;
   const OT::GDEF *gdef_table;
   const hb_sorted_vector_t<hb_aat_map_t::range_flags_t> *range_flags = nullptr;
+  bool using_buffer_glyph_set = false;
   hb_bit_set_t buffer_glyph_set;
   const hb_bit_set_t *left_set = nullptr;
   const hb_bit_set_t *right_set = nullptr;
@@ -87,23 +88,28 @@
 
   void set_lookup_index (unsigned int i) { lookup_index = i; }
 
-#define BUFFER_GLYPH_SET_THRESHOLD 4
-  void setup_buffer_glyph_set ()
+#define HB_MALLOC_COST 48
+#define HB_BIT_SET_HAS_COST 8
+  void setup_buffer_glyph_set (unsigned subchain_count)
   {
-    if (buffer->len < BUFFER_GLYPH_SET_THRESHOLD) return;
+    // Using buffer_glyph_set has at least two mallocs. Avoid it for small workloads.
+    unsigned malloced_cost = HB_MALLOC_COST * 2 + subchain_count * HB_BIT_SET_HAS_COST;
+    unsigned unmalloced_cost = subchain_count * buffer->len;
+    using_buffer_glyph_set = malloced_cost < unmalloced_cost;
 
-    buffer_glyph_set = buffer->bit_set ();
+    if (using_buffer_glyph_set)
+      buffer_glyph_set = buffer->bit_set ();
   }
   bool buffer_intersects_machine () const
   {
-    if (buffer->len < BUFFER_GLYPH_SET_THRESHOLD)
-    {
-      for (unsigned i = 0; i < buffer->len; i++)
-	if (machine_glyph_set->has (buffer->info[i].codepoint))
-	  return true;
-      return false;
-    }
-    return buffer_glyph_set.intersects (*machine_glyph_set);
+    if (using_buffer_glyph_set)
+      return buffer_glyph_set.intersects (*machine_glyph_set);
+
+    // Faster for shorter buffers.
+    for (unsigned i = 0; i < buffer->len; i++)
+      if (machine_glyph_set->has (buffer->info[i].codepoint))
+	return true;
+    return false;
   }
 };
 
diff --git a/src/hb-aat-layout-morx-table.hh b/src/hb-aat-layout-morx-table.hh
index b232a40..a975b79 100644
--- a/src/hb-aat-layout-morx-table.hh
+++ b/src/hb-aat-layout-morx-table.hh
@@ -1374,11 +1374,13 @@
       }
 
       this->chain_count = table->get_chain_count ();
+      this->subchain_count = table->get_subchain_count ();
 
       this->accels = (hb_atomic_ptr_t<hb_aat_layout_chain_accelerator_t> *) hb_calloc (this->chain_count, sizeof (*accels));
       if (unlikely (!this->accels))
       {
 	this->chain_count = 0;
+	this->subchain_count = 0;
 	this->table.destroy ();
 	this->table = hb_blob_get_empty ();
       }
@@ -1422,6 +1424,7 @@
 
     hb_blob_ptr_t<T> table;
     unsigned int chain_count;
+    unsigned int subchain_count;
     hb_atomic_ptr_t<hb_aat_layout_chain_accelerator_t> *accels;
   };
 
@@ -1444,7 +1447,19 @@
 
   unsigned get_chain_count () const
   {
-	  return chainCount;
+    return chainCount;
+  }
+  unsigned get_subchain_count () const
+  {
+    const Chain<Types> *chain = &firstChain;
+    unsigned int count = chainCount;
+    unsigned int subchain_count = 0;
+    for (unsigned int i = 0; i < count; i++)
+    {
+      subchain_count += chain->get_subtable_count ();
+      chain = &StructAfter<Chain<Types>> (*chain);
+    }
+    return subchain_count;
   }
 
   void apply (hb_aat_apply_context_t *c,
@@ -1455,7 +1470,7 @@
 
     c->buffer->unsafe_to_concat ();
 
-    c->setup_buffer_glyph_set ();
+    c->setup_buffer_glyph_set (accel.subchain_count);
 
     c->set_lookup_index (0);
     const Chain<Types> *chain = &firstChain;