[set-digest] Inline combiner
Instead of defining one digest and then combining three of them at
different shifts, inline the code The compiler can optimize it better.
3% speedup with Amiri benchmark.
diff --git a/src/hb-set-digest.hh b/src/hb-set-digest.hh
index b718b94..76c239e 100644
--- a/src/hb-set-digest.hh
+++ b/src/hb-set-digest.hh
@@ -64,45 +64,56 @@
* check is done using four bitwise operations only.
*/
-template <typename mask_t, unsigned int shift>
-struct hb_set_digest_bits_pattern_t
+struct hb_set_digest_t
{
+ using mask_t = uint64_t;
+ static constexpr unsigned shifts[] = {4, 0, 9};
+
static constexpr unsigned mask_bytes = sizeof (mask_t);
static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
- static constexpr unsigned num_bits = 0
- + (mask_bytes >= 1 ? 3 : 0)
- + (mask_bytes >= 2 ? 1 : 0)
- + (mask_bytes >= 4 ? 1 : 0)
- + (mask_bytes >= 8 ? 1 : 0)
- + (mask_bytes >= 16? 1 : 0)
- + 0;
+ static constexpr unsigned n = sizeof (shifts) / sizeof (shifts[0]);
+ static constexpr hb_codepoint_t mb1 = mask_bits - 1;
+ static constexpr mask_t one = 1;
+ static constexpr mask_t all = (mask_t) -1;
- static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
- static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
+ void init ()
+ { for (unsigned i = 0; i < n; i++) masks[i] = 0; }
- void init () { mask = 0; }
+ static hb_set_digest_t full ()
+ {
+ hb_set_digest_t d;
+ for (unsigned i = 0; i < n; i++) d.masks[i] = all;
+ return d;
+ }
- static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; }
-
- void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
-
- void add (hb_codepoint_t g) { mask |= mask_for (g); }
+ void union_ (const hb_set_digest_t &o)
+ { for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; }
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
{
- if (mask == (mask_t) -1) return false;
- if ((b >> shift) - (a >> shift) >= mask_bits - 1)
+ bool ret;
+
+ ret = false;
+ for (unsigned i = 0; i < n; i++)
+ if (masks[i] != all)
+ ret = true;
+ if (!ret) return false;
+
+ ret = false;
+ for (unsigned i = 0; i < n; i++)
{
- mask = (mask_t) -1;
- return false;
+ mask_t shift = shifts[i];
+ if ((b >> shift) - (a >> shift) >= mb1)
+ masks[i] = all;
+ else
+ {
+ mask_t ma = one << ((a >> shift) & mb1);
+ mask_t mb = one << ((b >> shift) & mb1);
+ masks[i] |= mb + (mb - ma) - (mb < ma);
+ ret = true;
+ }
}
- else
- {
- mask_t ma = mask_for (a);
- mask_t mb = mask_for (b);
- mask |= mb + (mb - ma) - (mb < ma);
- return true;
- }
+ return ret;
}
template <typename T>
@@ -125,103 +136,36 @@
template <typename T>
bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
- bool may_have (const hb_set_digest_bits_pattern_t &o) const
- { return mask & o.mask; }
-
- bool may_have (hb_codepoint_t g) const
- { return mask & mask_for (g); }
-
bool operator [] (hb_codepoint_t g) const
{ return may_have (g); }
- private:
-
- static mask_t mask_for (hb_codepoint_t g)
- { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
- mask_t mask = 0;
-};
-
-template <typename head_t, typename tail_t>
-struct hb_set_digest_combiner_t
-{
- void init ()
- {
- head.init ();
- tail.init ();
- }
-
- static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; }
-
- void union_ (const hb_set_digest_combiner_t &o)
- {
- head.union_ (o.head);
- tail.union_(o.tail);
- }
void add (hb_codepoint_t g)
{
- head.add (g);
- tail.add (g);
- }
-
- bool add_range (hb_codepoint_t a, hb_codepoint_t b)
- {
- return (int) head.add_range (a, b) | (int) tail.add_range (a, b);
- }
- template <typename T>
- void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
- {
- head.add_array (array, count, stride);
- tail.add_array (array, count, stride);
- }
- template <typename T>
- void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
- template <typename T>
- bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
- {
- return head.add_sorted_array (array, count, stride) &&
- tail.add_sorted_array (array, count, stride);
- }
- template <typename T>
- bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
-
- bool may_have (const hb_set_digest_combiner_t &o) const
- {
- return head.may_have (o.head) && tail.may_have (o.tail);
+ for (unsigned i = 0; i < n; i++)
+ masks[i] |= one << ((g >> shifts[i]) & mb1);
}
bool may_have (hb_codepoint_t g) const
{
- return head.may_have (g) && tail.may_have (g);
+ for (unsigned i = 0; i < n; i++)
+ if (!(masks[i] & (one << ((g >> shifts[i]) & mb1))))
+ return false;
+ return true;
}
- bool operator [] (hb_codepoint_t g) const
- { return may_have (g); }
+ bool may_have (const hb_set_digest_t &o) const
+ {
+ for (unsigned i = 0; i < n; i++)
+ if (!(masks[i] & o.masks[i]))
+ return false;
+ return true;
+ }
private:
- head_t head;
- tail_t tail;
+
+ mask_t masks[n] = {};
};
-/*
- * hb_set_digest_t
- *
- * This is a combination of digests that performs "best".
- * There is not much science to this: it's a result of intuition
- * and testing.
- */
-using hb_set_digest_t =
- hb_set_digest_combiner_t
- <
- hb_set_digest_bits_pattern_t<unsigned long, 4>,
- hb_set_digest_combiner_t
- <
- hb_set_digest_bits_pattern_t<unsigned long, 0>,
- hb_set_digest_bits_pattern_t<unsigned long, 9>
- >
- >
-;
-
-
#endif /* HB_SET_DIGEST_HH */