[subset] Add table duplication overflow resolution.
diff --git a/src/hb-debug.hh b/src/hb-debug.hh
index ec3a1ff..a92614d 100644
--- a/src/hb-debug.hh
+++ b/src/hb-debug.hh
@@ -438,6 +438,10 @@
#define TRACE_SUBSET(this) hb_no_trace_t<bool> trace
#endif
+#ifndef HB_DEBUG_SUBSET_REPACK
+#define HB_DEBUG_SUBSET_REPACK (HB_DEBUG+0)
+#endif
+
#ifndef HB_DEBUG_DISPATCH
#define HB_DEBUG_DISPATCH ( \
HB_DEBUG_APPLY + \
diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh
index e489494..83a4db7 100644
--- a/src/hb-repacker.hh
+++ b/src/hb-repacker.hh
@@ -39,6 +39,36 @@
// TODO(garretrieger): add an error tracking system similar to what serialize_context_t
// does.
+ struct overflow_record_t
+ {
+ unsigned parent;
+ const hb_serialize_context_t::object_t::link_t* link;
+ };
+
+ struct clone_buffer_t
+ {
+ clone_buffer_t () : head (nullptr), tail (nullptr) {}
+
+ void copy (const hb_serialize_context_t::object_t& object)
+ {
+ fini ();
+ unsigned size = object.tail - object.head;
+ head = (char*) malloc (size);
+ memcpy (head, object.head, size);
+ tail = head + size;
+ }
+
+ char* head;
+ char* tail;
+
+ void fini ()
+ {
+ if (!head) return;
+ free (head);
+ head = nullptr;
+ }
+ };
+
/*
* A topological sorting of an object graph. Ordered
* in reverse serialization order (first object in the
@@ -72,6 +102,7 @@
~graph_t ()
{
objects_.fini_deep ();
+ clone_buffers_.fini_deep ();
}
/*
@@ -214,10 +245,44 @@
}
/*
+ * Creates a copy of child and re-assigns the link from
+ * parent to the clone. The copy is a shallow copy, objects
+ * linked from child are not duplicated.
+ */
+ void duplicate (unsigned parent_idx, unsigned child_idx)
+ {
+ const auto& child = objects_[child_idx];
+ clone_buffer_t* buffer = clone_buffers_.push ();
+ buffer->copy (child);
+
+ auto* clone = objects_.push ();
+ clone->head = buffer->head;
+ clone->tail = buffer->tail;
+ for (const auto& l : child.links)
+ clone->links.push (l);
+
+ auto& parent = objects_[parent_idx];
+ unsigned clone_idx = objects_.length - 2;
+ for (unsigned i = 0; i < parent.links.length; i++)
+ {
+ auto& l = parent.links[i];
+ if (l.objidx == child_idx) l.objidx = clone_idx;
+ }
+
+ // The last object is the root of the graph, so swap back the root to the end.
+ // The root's obj idx does change, however since it's root nothing else refers to it.
+ // all other obj idx's will be unaffected.
+ hb_serialize_context_t::object_t root = objects_[objects_.length - 2];
+ objects_[objects_.length - 2] = *clone;
+ objects_[objects_.length - 1] = root;
+ }
+
+ /*
* Will any offsets overflow on graph when it's serialized?
*/
- bool will_overflow ()
+ bool will_overflow (hb_vector_t<overflow_record_t>* overflows)
{
+ if (overflows) overflows->resize (0);
hb_vector_t<unsigned> start_positions;
start_positions.resize (objects_.length);
hb_vector_t<unsigned> end_positions;
@@ -232,7 +297,7 @@
}
- for (unsigned parent_idx = 0; parent_idx < objects_.length; parent_idx++)
+ for (int parent_idx = objects_.length - 1; parent_idx >= 0; parent_idx--)
{
for (const auto& link : objects_[parent_idx].links)
{
@@ -241,11 +306,52 @@
start_positions,
end_positions);
- if (!is_valid_offset (offset, link)) return true;
+ if (is_valid_offset (offset, link))
+ continue;
+
+ if (!overflows) return true;
+
+ overflow_record_t r;
+ r.parent = parent_idx;
+ r.link = &link;
+ overflows->push (r);
}
}
- return false;
+ if (!overflows) return false;
+ return overflows->length;
+ }
+
+ /*
+ * Creates a map from objid to # of incoming edges.
+ */
+ void incoming_edge_count (hb_vector_t<unsigned>* out) const
+ {
+ out->resize (0);
+ out->resize (objects_.length);
+ for (const auto& o : objects_)
+ {
+ for (const auto& l : o.links)
+ {
+ (*out)[l.objidx] += 1;
+ }
+ }
+ }
+
+ void print_overflows (const hb_vector_t<overflow_record_t>& overflows) const
+ {
+ if (!DEBUG_ENABLED(SUBSET_REPACK)) return;
+
+ hb_vector_t<unsigned> edge_count;
+ incoming_edge_count (&edge_count);
+ for (const auto& o : overflows)
+ {
+ DEBUG_MSG (SUBSET_REPACK, nullptr, "overflow from %d => %d (%d incoming , %d outgoing)",
+ o.parent,
+ o.link->objidx,
+ edge_count[o.link->objidx],
+ objects_[o.link->objidx].links.length);
+ }
}
private:
@@ -371,22 +477,6 @@
}
}
- /*
- * Creates a map from objid to # of incoming edges.
- */
- void incoming_edge_count (hb_vector_t<unsigned>* out)
- {
- out->resize (0);
- out->resize (objects_.length);
- for (const auto& o : objects_)
- {
- for (const auto& l : o.links)
- {
- (*out)[l.objidx] += 1;
- }
- }
- }
-
template <typename O> void
serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link,
char* head,
@@ -426,6 +516,7 @@
public:
hb_vector_t<hb_serialize_context_t::object_t> objects_;
+ hb_vector_t<clone_buffer_t> clone_buffers_;
};
@@ -438,13 +529,49 @@
hb_serialize_context_t* c) {
graph_t sorted_graph (packed);
sorted_graph.sort_kahn ();
- if (sorted_graph.will_overflow ()) {
- sorted_graph.sort_shortest_distance ();
- // TODO(garretrieger): try additional offset resolution strategies
- // - Dijkstra sort of weighted graph.
- // - Promotion to extension lookups.
- // - Table duplication.
- // - Table splitting.
+ if (!sorted_graph.will_overflow (nullptr)) return;
+
+ sorted_graph.sort_shortest_distance ();
+
+ unsigned round = 0;
+ hb_vector_t<graph_t::overflow_record_t> overflows;
+ // TODO(garretrieger): select a good limit for max rounds.
+ while (sorted_graph.will_overflow (&overflows) && round++ < 10) {
+ DEBUG_MSG (SUBSET_REPACK, nullptr, "Over flow resolution round %d", round);
+ sorted_graph.print_overflows (overflows);
+
+ // TODO(garretrieger): cache ege count in the graph object . Will need to be invalidated
+ // by graph modifications.
+ hb_vector_t<unsigned> edge_count;
+ sorted_graph.incoming_edge_count (&edge_count);
+
+ // Try resolving the furthest overflow first.
+ bool resolution_attempted = false;
+ for (int i = overflows.length - 1; i >= 0; i--)
+ {
+ const graph_t::overflow_record_t& r = overflows[i];
+ if (edge_count[r.link->objidx] > 1)
+ {
+ DEBUG_MSG (SUBSET_REPACK, nullptr, "Duplicating %d => %d",
+ r.parent, r.link->objidx);
+ // The child object is shared, we may be able to eliminate the overflow
+ // by duplicating it.
+ sorted_graph.duplicate (r.parent, r.link->objidx);
+ sorted_graph.sort_shortest_distance ();
+ resolution_attempted = true;
+ break;
+ }
+
+ // TODO(garretrieger): add additional offset resolution strategies
+ // - Promotion to extension lookups.
+ // - Table splitting.
+ }
+
+ if (!resolution_attempted)
+ {
+ DEBUG_MSG (SUBSET_REPACK, nullptr, "No resolution available :(");
+ break;
+ }
}
sorted_graph.serialize (c);
diff --git a/src/test-repacker.cc b/src/test-repacker.cc
index a070cdd..e18cabd 100644
--- a/src/test-repacker.cc
+++ b/src/test-repacker.cc
@@ -75,12 +75,12 @@
static void
populate_serializer_with_overflow (hb_serialize_context_t* c)
{
- std::string large_string(40000, 'a');
+ std::string large_string(50000, 'a');
c->start_serialize<char> ();
- unsigned obj_1 = add_object (large_string.c_str(), 40000, c);
- unsigned obj_2 = add_object (large_string.c_str(), 40000, c);
- unsigned obj_3 = add_object (large_string.c_str(), 40000, c);
+ unsigned obj_1 = add_object (large_string.c_str(), 10000, c);
+ unsigned obj_2 = add_object (large_string.c_str(), 20000, c);
+ unsigned obj_3 = add_object (large_string.c_str(), 50000, c);
start_object ("abc", 3, c);
add_offset (obj_3, c);
@@ -92,6 +92,26 @@
}
static void
+populate_serializer_with_dedup_overflow (hb_serialize_context_t* c)
+{
+ std::string large_string(70000, 'a');
+ c->start_serialize<char> ();
+
+ unsigned obj_1 = add_object ("def", 3, c);
+
+ start_object (large_string.c_str(), 60000, c);
+ add_offset (obj_1, c);
+ unsigned obj_2 = c->pop_pack (false);
+
+ start_object (large_string.c_str(), 10000, c);
+ add_offset (obj_2, c);
+ add_offset (obj_1, c);
+ c->pop_pack (false);
+
+ c->end_serialize();
+}
+
+static void
populate_serializer_complex_1 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
@@ -116,7 +136,7 @@
{
c->start_serialize<char> ();
- unsigned obj_5 = add_object ("mn", 3, c);
+ unsigned obj_5 = add_object ("mn", 2, c);
unsigned obj_4 = add_object ("jkl", 3, c);
@@ -137,6 +157,36 @@
c->end_serialize();
}
+static void
+populate_serializer_complex_3 (hb_serialize_context_t* c)
+{
+ c->start_serialize<char> ();
+
+ unsigned obj_6 = add_object ("opqrst", 6, c);
+
+ unsigned obj_5 = add_object ("mn", 2, c);
+
+ start_object ("jkl", 3, c);
+ add_offset (obj_6, c);
+ unsigned obj_4 = c->pop_pack (false);
+
+ start_object ("ghi", 3, c);
+ add_offset (obj_4, c);
+ unsigned obj_3 = c->pop_pack (false);
+
+ start_object ("def", 3, c);
+ add_offset (obj_3, c);
+ unsigned obj_2 = c->pop_pack (false);
+
+ start_object ("abc", 3, c);
+ add_offset (obj_2, c);
+ add_offset (obj_4, c);
+ add_offset (obj_5, c);
+ c->pop_pack ();
+
+ c->end_serialize();
+}
+
static void test_sort_kahn_1 ()
{
size_t buffer_size = 100;
@@ -227,6 +277,79 @@
assert(graph.objects_[0].links.length == 0);
}
+static void test_duplicate_leaf ()
+{
+ size_t buffer_size = 100;
+ void* buffer = malloc (buffer_size);
+ hb_serialize_context_t c (buffer, buffer_size);
+ populate_serializer_complex_2 (&c);
+
+ graph_t graph (c.object_graph ());
+ graph.duplicate (4, 1);
+
+ assert(strncmp (graph.objects_[5].head, "abc", 3) == 0);
+ assert(graph.objects_[5].links.length == 3);
+ assert(graph.objects_[5].links[0].objidx == 3);
+ assert(graph.objects_[5].links[1].objidx == 4);
+ assert(graph.objects_[5].links[2].objidx == 0);
+
+ assert(strncmp (graph.objects_[4].head, "jkl", 3) == 0);
+ assert(graph.objects_[4].links.length == 0);
+
+ assert(strncmp (graph.objects_[3].head, "def", 3) == 0);
+ assert(graph.objects_[3].links.length == 1);
+ assert(graph.objects_[3].links[0].objidx == 2);
+
+ assert(strncmp (graph.objects_[2].head, "ghi", 3) == 0);
+ assert(graph.objects_[2].links.length == 1);
+ assert(graph.objects_[2].links[0].objidx == 1);
+
+ assert(strncmp (graph.objects_[1].head, "jkl", 3) == 0);
+ assert(graph.objects_[1].links.length == 0);
+
+ assert(strncmp (graph.objects_[0].head, "mn", 2) == 0);
+ assert(graph.objects_[0].links.length == 0);
+}
+
+static void test_duplicate_interior ()
+{
+ size_t buffer_size = 100;
+ void* buffer = malloc (buffer_size);
+ hb_serialize_context_t c (buffer, buffer_size);
+ populate_serializer_complex_3 (&c);
+
+ graph_t graph (c.object_graph ());
+ graph.duplicate (3, 2);
+
+ assert(strncmp (graph.objects_[6].head, "abc", 3) == 0);
+ assert(graph.objects_[6].links.length == 3);
+ assert(graph.objects_[6].links[0].objidx == 4);
+ assert(graph.objects_[6].links[1].objidx == 2);
+ assert(graph.objects_[6].links[2].objidx == 1);
+
+ assert(strncmp (graph.objects_[5].head, "jkl", 3) == 0);
+ assert(graph.objects_[5].links.length == 1);
+ assert(graph.objects_[5].links[0].objidx == 0);
+
+ assert(strncmp (graph.objects_[4].head, "def", 3) == 0);
+ assert(graph.objects_[4].links.length == 1);
+ assert(graph.objects_[4].links[0].objidx == 3);
+
+ assert(strncmp (graph.objects_[3].head, "ghi", 3) == 0);
+ assert(graph.objects_[3].links.length == 1);
+ assert(graph.objects_[3].links[0].objidx == 5);
+
+ assert(strncmp (graph.objects_[2].head, "jkl", 3) == 0);
+ assert(graph.objects_[2].links.length == 1);
+ assert(graph.objects_[2].links[0].objidx == 0);
+
+ assert(strncmp (graph.objects_[1].head, "mn", 2) == 0);
+ assert(graph.objects_[1].links.length == 0);
+
+ assert(strncmp (graph.objects_[0].head, "opqrst", 6) == 0);
+ assert(graph.objects_[0].links.length == 0);
+}
+
static void
test_serialize ()
{
@@ -257,7 +380,7 @@
populate_serializer_complex_2 (&c);
graph_t graph (c.object_graph ());
- assert (!graph.will_overflow ());
+ assert (!graph.will_overflow (nullptr));
}
static void test_will_overflow_2 ()
@@ -268,9 +391,63 @@
populate_serializer_with_overflow (&c);
graph_t graph (c.object_graph ());
- assert (graph.will_overflow ());
+ assert (graph.will_overflow (nullptr));
}
+static void test_will_overflow_3 ()
+{
+ size_t buffer_size = 160000;
+ void* buffer = malloc (buffer_size);
+ hb_serialize_context_t c (buffer, buffer_size);
+ populate_serializer_with_dedup_overflow (&c);
+ graph_t graph (c.object_graph ());
+
+ assert (graph.will_overflow (nullptr));
+}
+
+static void test_resolve_overflows_via_sort ()
+{
+ size_t buffer_size = 160000;
+ void* buffer = malloc (buffer_size);
+ hb_serialize_context_t c (buffer, buffer_size);
+ populate_serializer_with_overflow (&c);
+ graph_t graph (c.object_graph ());
+
+ void* out_buffer = malloc (buffer_size);
+ hb_serialize_context_t out (out_buffer, buffer_size);
+
+ hb_resolve_overflows (c.object_graph (), &out);
+ assert (!out.offset_overflow);
+ hb_bytes_t result = out.copy_bytes ();
+ assert (result.length == (80000 + 3 + 3 * 2));
+
+ result.free ();
+ free (buffer);
+ free (out_buffer);
+}
+
+static void test_resolve_overflows_via_duplication ()
+{
+ size_t buffer_size = 160000;
+ void* buffer = malloc (buffer_size);
+ hb_serialize_context_t c (buffer, buffer_size);
+ populate_serializer_with_dedup_overflow (&c);
+ graph_t graph (c.object_graph ());
+
+ void* out_buffer = malloc (buffer_size);
+ hb_serialize_context_t out (out_buffer, buffer_size);
+
+ hb_resolve_overflows (c.object_graph (), &out);
+ assert (!out.offset_overflow);
+ hb_bytes_t result = out.copy_bytes ();
+ assert (result.length == (10000 + 2 * 2 + 60000 + 2 + 3 * 2));
+
+ result.free ();
+ free (buffer);
+ free (out_buffer);
+}
+
+// TODO(garretrieger): update will_overflow tests to check the overflows array.
// TODO(garretrieger): add a test(s) using a real font.
int
@@ -282,4 +459,9 @@
test_sort_shortest ();
test_will_overflow_1 ();
test_will_overflow_2 ();
+ test_will_overflow_3 ();
+ test_resolve_overflows_via_sort ();
+ test_resolve_overflows_via_duplication ();
+ test_duplicate_leaf ();
+ test_duplicate_interior ();
}