Adding support for CMapFormat4.



git-svn-id: http://sfntly.googlecode.com/svn/trunk/cpp/src@85 672e30a5-4c29-85ac-ac6d-611c735e0a51
diff --git a/sfntly/table/core/cmap_table.cc b/sfntly/table/core/cmap_table.cc
index 9df1158..bfc7f86 100644
--- a/sfntly/table/core/cmap_table.cc
+++ b/sfntly/table/core/cmap_table.cc
@@ -261,10 +261,7 @@
 #endif
       break;
     case CMapFormat::kFormat4:
-#if defined (SFNTLY_DEBUG_CMAP)
-      fprintf(stderr, "Requesting Format4 builder, but it's unsupported; "
-              "returning NULL\n");
-#endif
+      builder.Attach(CMapFormat4::Builder::NewInstance(data, offset, cmap_id));
       break;
     default:
 #ifdef SFNTLY_DEBUG_CMAP
@@ -289,10 +286,7 @@
 #endif
       break;
     case CMapFormat::kFormat4:
-#if defined (SFNTLY_DEBUG_CMAP)
-      fprintf(stderr, "Requesting Format4 builder, but it's unsupported; "
-              "returning NULL\n");
-#endif
+      builder.Attach(CMapFormat4::Builder::NewInstance(cmap_id));
       break;
     default:
 #ifdef SFNTLY_DEBUG_CMAP
@@ -595,14 +589,534 @@
 }
 
 /******************************************************************************
+ * CMapTable::CMapFormat4
+ ******************************************************************************/
+CMapTable::CMapFormat4::CMapFormat4(ReadableFontData* data,
+                                    const CMapId& cmap_id)
+    : CMapTable::CMap::CMap(data, CMapFormat::kFormat4, cmap_id),
+      seg_count_(SegCount(data)),
+      start_code_offset_(StartCodeOffset(seg_count_)),
+      end_code_offset_(Offset::kFormat4EndCount),
+      id_delta_offset_(IdDeltaOffset(seg_count_)),
+      glyph_id_array_offset_(GlyphIdArrayOffset(seg_count_)) {
+}
+
+CMapTable::CMapFormat4::~CMapFormat4() {
+}
+
+int32_t CMapTable::CMapFormat4::GlyphId(int32_t character) {
+  int32_t segment = data_->SearchUShort(StartCodeOffset(seg_count_),
+                                        DataSize::kUSHORT,
+                                        Offset::kFormat4EndCount,
+                                        DataSize::kUSHORT,
+                                        seg_count_,
+                                        character);
+  if (segment == -1) {
+    return CMapTable::NOTDEF;
+  }
+  int32_t start_code = StartCode(segment);
+  return RetrieveGlyphId(segment, start_code, character);
+}
+
+int32_t CMapTable::CMapFormat4::RetrieveGlyphId(int32_t segment,
+                                                int32_t start_code,
+                                                int32_t character) {
+  if (character < start_code) {
+    return CMapTable::NOTDEF;
+  }
+  int32_t id_range_offset = IdRangeOffset(segment);
+  if (id_range_offset == 0) {
+    return (character + IdDelta(segment)) % 65536;
+  }
+  return data_->ReadUShort(id_range_offset +
+                           IdRangeOffsetLocation(segment) +
+                           2 * (character - start_code));
+}
+
+int32_t CMapTable::CMapFormat4::seg_count() {
+  return seg_count_;
+}
+
+int32_t CMapTable::CMapFormat4::Length() {
+  return Length(data_);
+}
+
+int32_t CMapTable::CMapFormat4::StartCode(int32_t segment) {
+  if (!IsValidIndex(segment)) {
+    return -1;
+  }
+  return StartCode(data_.p_, seg_count_, segment);
+}
+
+// static
+int32_t CMapTable::CMapFormat4::Language(ReadableFontData* data) {
+  int32_t language = data->ReadUShort(Offset::kFormat4Language);
+  return language;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::Length(ReadableFontData* data) {
+  int32_t length = data->ReadUShort(Offset::kFormat4Length);
+  return length;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::SegCount(ReadableFontData* data) {
+  int32_t seg_count = data->ReadUShort(Offset::kFormat4SegCountX2) / 2;
+  return seg_count;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::StartCode(ReadableFontData* data,
+                                          int32_t seg_count,
+                                          int32_t index) {
+  int32_t start_code = data->ReadUShort(StartCodeOffset(seg_count) +
+                                        index * DataSize::kUSHORT);
+  return start_code;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::StartCodeOffset(int32_t seg_count) {
+  int32_t start_code_offset = Offset::kFormat4EndCount +
+      (seg_count + 1) * DataSize::kUSHORT;
+  return start_code_offset;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::EndCode(ReadableFontData* data,
+                                        int32_t seg_count,
+                                        int32_t index) {
+  int32_t end_code = data->ReadUShort(Offset::kFormat4EndCount +
+                                      index * DataSize::kUSHORT);
+  return end_code;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::IdDelta(ReadableFontData* data,
+                                        int32_t seg_count,
+                                        int32_t index) {
+  int32_t id_delta = data->ReadUShort(IdDeltaOffset(seg_count) +
+                                      index * DataSize::kUSHORT);
+  return id_delta;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::IdDeltaOffset(int32_t seg_count) {
+  int32_t id_delta_offset =
+      Offset::kFormat4EndCount + (2 * seg_count + 1) * DataSize::kUSHORT;
+  return id_delta_offset;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::IdRangeOffset(ReadableFontData* data,
+                                              int32_t seg_count,
+                                              int32_t index) {
+  int32_t id_range_offset =
+      data->ReadUShort(IdRangeOffsetOffset(seg_count)
+                       + index * DataSize::kUSHORT);
+  return id_range_offset;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::IdRangeOffsetOffset(int32_t seg_count) {
+  int32_t id_range_offset_offset =
+      Offset::kFormat4EndCount + (2 * seg_count + 1) * DataSize::kUSHORT +
+      seg_count * DataSize::kSHORT;
+  return id_range_offset_offset;
+}
+
+// static
+int32_t CMapTable::CMapFormat4::GlyphIdArrayOffset(int32_t seg_count) {
+  int32_t glyph_id_array_offset =
+      Offset::kFormat4EndCount + (3 * seg_count + 1) * DataSize::kUSHORT +
+      seg_count * DataSize::kSHORT;
+  return glyph_id_array_offset;
+}
+
+int32_t CMapTable::CMapFormat4::EndCode(int32_t segment) {
+  if (IsValidIndex(segment)) {
+    return EndCode(data_, seg_count_, segment);
+  }
+#if defined (SFNTLY_NO_EXCEPTION)
+  return -1;
+#else
+  throw IllegalArgumentException();
+#endif
+}
+
+bool CMapTable::CMapFormat4::IsValidIndex(int32_t segment) {
+  if (segment < 0 || segment >= seg_count_) {
+#if defined (SFNTLY_NO_EXCEPTION)
+    return false;
+#else
+    throw IllegalArgumentException();
+#endif
+  }
+  return true;
+}
+
+int32_t CMapTable::CMapFormat4::IdDelta(int32_t segment) {
+  if (IsValidIndex(segment))
+    return IdDelta(data_, seg_count_, segment);
+  return -1;
+}
+
+int32_t CMapTable::CMapFormat4::IdRangeOffset(int32_t segment) {
+  if (IsValidIndex(segment))
+    return data_->ReadUShort(IdRangeOffsetLocation(segment));
+  return -1;
+}
+
+int32_t CMapTable::CMapFormat4::IdRangeOffsetLocation(int32_t segment) {
+  if (IsValidIndex(segment))
+    return IdRangeOffsetOffset(seg_count_) + segment * DataSize::kUSHORT;
+  return -1;
+}
+
+int32_t CMapTable::CMapFormat4::GlyphIdArray(int32_t index) {
+  return data_->ReadUShort(glyph_id_array_offset_ + index * DataSize::kUSHORT);
+}
+
+int32_t CMapTable::CMapFormat4::Language() {
+  return Language(data_);
+}
+
+
+CMapTable::CMap::CharacterIterator* CMapTable::CMapFormat4::Iterator() {
+  return new CharacterIterator(this);
+}
+
+/******************************************************************************
+ * CMapTable::CMapFormat4::CharacterIterator class
+ ******************************************************************************/
+CMapTable::CMapFormat4::CharacterIterator::CharacterIterator(
+    CMapFormat4* parent)
+    : parent_(parent),
+      segment_index_(0),
+      first_char_in_segment_(-1),
+      last_char_in_segment_(-1),
+      next_char_(-1),
+      next_char_set_(false) {
+}
+
+bool CMapTable::CMapFormat4::CharacterIterator::HasNext() {
+  if (next_char_set_)
+    return true;
+  while (segment_index_ < parent_->seg_count_) {
+    if (first_char_in_segment_ < 0) {
+      first_char_in_segment_ = parent_->StartCode(segment_index_);
+      last_char_in_segment_ = parent_->EndCode(segment_index_);
+      next_char_ = first_char_in_segment_;
+      next_char_set_ = true;
+      return true;
+    }
+    if (next_char_ < last_char_in_segment_) {
+      next_char_++;
+      next_char_set_ = true;
+      return true;
+    }
+    segment_index_++;
+    first_char_in_segment_ = -1;
+  }
+  return false;
+}
+
+int32_t CMapTable::CMapFormat4::CharacterIterator::Next() {
+  if (!next_char_set_) {
+    if (!HasNext()) {
+#if defined (SFNTLY_NO_EXCEPTION)
+      return -1;
+#else
+      throw NoSuchElementException("No more characters to iterate.");
+#endif
+    }
+  }
+  next_char_set_ = false;
+  return next_char_;
+}
+
+/******************************************************************************
+ * CMapTable::CMapFormat4::Builder::Segment class
+ ******************************************************************************/
+CMapTable::CMapFormat4::Builder::Segment::Segment() {}
+
+CMapTable::CMapFormat4::Builder::Segment::Segment(Segment* other)
+    : start_count_(other->start_count_),
+      end_count_(other->end_count_),
+      id_delta_(other->id_delta_),
+      id_range_offset_(other->id_range_offset_) {
+}
+
+CMapTable::CMapFormat4::Builder::Segment::Segment(int32_t start_count,
+                                                  int32_t end_count,
+                                                  int32_t id_delta,
+                                                  int32_t id_range_offset)
+    : start_count_(start_count),
+      end_count_(end_count),
+      id_delta_(id_delta),
+      id_range_offset_(id_range_offset) {
+}
+
+CMapTable::CMapFormat4::Builder::Segment::~Segment() {}
+
+int32_t CMapTable::CMapFormat4::Builder::Segment::start_count() {
+  return start_count_;
+}
+
+void
+CMapTable::CMapFormat4::Builder::Segment::set_start_count(int32_t start_count) {
+  start_count_ = start_count;
+}
+
+int32_t CMapTable::CMapFormat4::Builder::Segment::end_count() {
+  return end_count_;
+}
+
+void
+CMapTable::CMapFormat4::Builder::Segment::set_end_count(int32_t end_count) {
+  end_count_ = end_count;
+}
+
+int32_t CMapTable::CMapFormat4::Builder::Segment::id_delta() {
+  return id_delta_;
+}
+
+void
+CMapTable::CMapFormat4::Builder::Segment::set_id_delta(int32_t id_delta) {
+  id_delta_ = id_delta;
+}
+
+int32_t CMapTable::CMapFormat4::Builder::Segment::id_range_offset() {
+  return id_range_offset_;
+}
+
+void
+CMapTable::CMapFormat4::Builder::Segment::
+set_id_range_offset(int32_t id_range_offset) {
+  id_range_offset_ = id_range_offset;
+}
+
+// static
+CALLER_ATTACH SegmentList*
+CMapTable::CMapFormat4::Builder::Segment::DeepCopy(SegmentList* original) {
+  SegmentList* list = new SegmentList;
+  for (SegmentList::iterator it = original->begin(),
+           e = original->end(); it != e; ++it) {
+    list->push_back(*it);
+  }
+  return list;
+}
+
+/******************************************************************************
+ * CMapTable::CMapFormat4::Builder class
+ ******************************************************************************/
+CALLER_ATTACH CMapTable::CMapFormat4::Builder*
+CMapTable::CMapFormat4::Builder::NewInstance(ReadableFontData* data,
+                                             int32_t offset,
+                                             const CMapId& cmap_id) {
+  ReadableFontDataPtr rdata;
+  if (data) {
+    rdata.Attach
+        (down_cast<ReadableFontData*>
+         (data->Slice(offset,
+                      data->ReadUShort(offset + Offset::kFormat4Length))));
+  }
+  return new Builder(rdata, CMapFormat::kFormat4, cmap_id);
+}
+
+CALLER_ATTACH CMapTable::CMapFormat4::Builder*
+CMapTable::CMapFormat4::Builder::NewInstance(WritableFontData* data,
+                                             int32_t offset,
+                                             const CMapId& cmap_id) {
+  WritableFontDataPtr wdata;
+  if (data) {
+    wdata.Attach
+        (down_cast<WritableFontData*>
+         (data->Slice(offset,
+                      data->ReadUShort(offset + Offset::kFormat4Length))));
+  }
+  return new Builder(wdata, CMapFormat::kFormat4, cmap_id);
+}
+
+CALLER_ATTACH CMapTable::CMapFormat4::Builder*
+CMapTable::CMapFormat4::Builder::NewInstance(const CMapId& cmap_id) {
+  return new Builder(cmap_id);
+}
+
+CMapTable::CMapFormat4::Builder::Builder(ReadableFontData* data, int32_t offset,
+                                         const CMapId& cmap_id)
+    : CMap::Builder(data, CMapFormat::kFormat4, cmap_id) {
+}
+
+CMapTable::CMapFormat4::Builder::Builder(WritableFontData* data, int32_t offset,
+                                         const CMapId& cmap_id)
+    : CMap::Builder(data, CMapFormat::kFormat4, cmap_id) {
+}
+
+CMapTable::CMapFormat4::Builder::Builder(SegmentList* segments,
+                                         IntegerList* glyph_id_array,
+                                         const CMapId& cmap_id)
+    : CMap::Builder(reinterpret_cast<ReadableFontData*>(NULL),
+                    CMapFormat::kFormat4, cmap_id),
+      segments_(segments->begin(), segments->end()),
+      glyph_id_array_(glyph_id_array->begin(), glyph_id_array->end()) {
+  set_model_changed();
+}
+
+CMapTable::CMapFormat4::Builder::Builder(const CMapId& cmap_id)
+    : CMap::Builder(reinterpret_cast<ReadableFontData*>(NULL),
+                    CMapFormat::kFormat4, cmap_id) {
+}
+
+CMapTable::CMapFormat4::Builder::~Builder() {}
+
+void CMapTable::CMapFormat4::Builder::Initialize(ReadableFontData* data) {
+  if (data == NULL || data->Length() == 0)
+    return;
+
+  // build segments
+  int32_t seg_count = CMapFormat4::SegCount(data);
+  for (int32_t index = 0; index < seg_count; ++index) {
+    Ptr<Segment> segment = new Segment;
+    segment->set_start_count(CMapFormat4::StartCode(data, seg_count, index));
+#if defined SFNTLY_DEBUG_CMAP
+    fprintf(stderr, "Segment %d; start %d\n", index, segment->start_count());
+#endif
+    segment->set_end_count(CMapFormat4::EndCode(data, seg_count, index));
+    segment->set_id_delta(CMapFormat4::IdDelta(data, seg_count, index));
+    segment->set_id_range_offset(CMapFormat4::IdRangeOffset(data,
+                                                           seg_count,
+                                                           index));
+    segments_.push_back(segment);
+  }
+
+  // build glyph id array
+  int32_t glyph_id_array_offset = CMapFormat4::GlyphIdArrayOffset(seg_count);
+  int32_t glyph_id_array_length =
+      (CMapFormat4::Length(data) - glyph_id_array_offset)
+      / DataSize::kUSHORT;
+  fprintf(stderr, "id array size %d\n", glyph_id_array_length);
+  for (int32_t i = 0; i < glyph_id_array_length; i += DataSize::kUSHORT) {
+    glyph_id_array_.push_back(data->ReadUShort(glyph_id_array_offset + i));
+  }
+}
+
+SegmentList* CMapTable::CMapFormat4::Builder::segments() {
+  if (segments_.empty()) {
+    Initialize(InternalReadData());
+    set_model_changed();
+  }
+  return &segments_;
+}
+
+void CMapTable::CMapFormat4::Builder::set_segments(SegmentList* segments) {
+  segments_.assign(segments->begin(), segments->end());
+  set_model_changed();
+}
+
+IntegerList* CMapTable::CMapFormat4::Builder::glyph_id_array() {
+  if (glyph_id_array_.empty()) {
+    Initialize(InternalReadData());
+    set_model_changed();
+  }
+  return &glyph_id_array_;
+}
+
+void CMapTable::CMapFormat4::Builder::
+set_glyph_id_array(IntegerList* glyph_id_array) {
+  glyph_id_array_.assign(glyph_id_array->begin(), glyph_id_array->end());
+  set_model_changed();
+}
+
+CALLER_ATTACH FontDataTable*
+CMapTable::CMapFormat4::Builder::SubBuildTable(ReadableFontData* data) {
+  FontDataTablePtr table = new CMapFormat4(data, cmap_id());
+  return table.Detach();
+}
+
+void CMapTable::CMapFormat4::Builder::SubDataSet() {
+  segments_.clear();
+  glyph_id_array_.clear();
+  set_model_changed();
+}
+
+int32_t CMapTable::CMapFormat4::Builder::SubDataSizeToSerialize() {
+  if (!model_changed()) {
+    return CMap::Builder::SubDataSizeToSerialize();
+  }
+  int32_t size = Offset::kFormat4FixedSize + segments_.size()
+      * (3 * DataSize::kUSHORT + DataSize::kSHORT)
+      + glyph_id_array_.size() * DataSize::kSHORT;
+  return size;
+}
+
+bool CMapTable::CMapFormat4::Builder::SubReadyToSerialize() {
+  if (!model_changed()) {
+    return CMap::Builder::SubReadyToSerialize();
+  }
+  if (!segments()->empty()) {
+    return true;
+  }
+  return false;
+}
+
+int32_t
+CMapTable::CMapFormat4::Builder::SubSerialize(WritableFontData* new_data) {
+  if (!model_changed()) {
+    return CMap::Builder::SubSerialize(new_data);
+  }
+  int32_t index = 0;
+  index += new_data->WriteUShort(index, CMapFormat::kFormat4);
+  index += DataSize::kUSHORT;  // length - write this at the end
+  index += new_data->WriteUShort(index, language());
+
+  int32_t seg_count = segments_.size();
+  index += new_data->WriteUShort(index, seg_count * 2);
+  int32_t log2_seg_count = FontMath::Log2(seg_count);
+  int32_t search_range = 1 << (log2_seg_count + 1);
+  index += new_data->WriteUShort(index, search_range);
+  int32_t entry_selector = log2_seg_count;
+  index += new_data->WriteUShort(index, entry_selector);
+  int32_t range_shift = 2 * seg_count - search_range;
+  index += new_data->WriteUShort(index, range_shift);
+
+  for (int32_t i = 0; i < seg_count; ++i) {
+    index += new_data->WriteUShort(index, segments_[i]->end_count());
+  }
+  index += new_data->WriteUShort(index, 0);  // reserved ushort
+  for (int32_t i = 0; i < seg_count; ++i) {
+#if defined SFNTLY_DEBUG_CMAP
+    fprintf(stderr, "Segment %d; start %d\n", i, segments_[i]->start_count());
+#endif
+    index += new_data->WriteUShort(index, segments_[i]->start_count());
+  }
+  for (int32_t i = 0; i < seg_count; ++i) {
+    index += new_data->WriteShort(index, segments_[i]->id_delta());
+  }
+  for (int32_t i = 0; i < seg_count; ++i) {
+    index += new_data->WriteUShort(index, segments_[i]->id_range_offset());
+  }
+
+#if defined SFNTLY_DEBUG_CMAP
+  fprintf(stderr, "Glyph id array size %lu\n", glyph_id_array_.size());
+#endif
+  for (size_t i = 0; i < glyph_id_array_.size(); ++i) {
+    index += new_data->WriteUShort(index, glyph_id_array_[i]);
+  }
+
+  new_data->WriteUShort(Offset::kFormat4Length, index);
+  return index;
+}
+
+/******************************************************************************
  * CMapTable::Builder class
  ******************************************************************************/
 CMapTable::Builder::Builder(Header* header, WritableFontData* data)
-    : SubTableContainerTable::Builder(header, data) {
+    : SubTableContainerTable::Builder(header, data), version_(0) {
 }
 
 CMapTable::Builder::Builder(Header* header, ReadableFontData* data)
-    : SubTableContainerTable::Builder(header, data) {
+    : SubTableContainerTable::Builder(header, data), version_(0) {
 }
 
 CMapTable::Builder::~Builder() {
diff --git a/sfntly/table/core/cmap_table.h b/sfntly/table/core/cmap_table.h
index f002ddc..29ce3e4 100644
--- a/sfntly/table/core/cmap_table.h
+++ b/sfntly/table/core/cmap_table.h
@@ -316,6 +316,184 @@
     CMap::CharacterIterator* Iterator();
   };
 
+    // CMapTable::CMapFormat4
+  class CMapFormat4 : public CMap,
+                      public RefCounted<CMapFormat4> {
+   public:
+    // CMapTable::CMapFormat4::Builder
+    class Builder : public CMap::Builder,
+                    public RefCounted<Builder> {
+     public:
+        // CMapTable::CMapFormat4::Builder::Segment
+      class Segment : public RefCounted<Segment> {
+       public:
+        Segment();
+        explicit Segment(Segment* other);
+        Segment(int32_t start_count,
+                int32_t end_count,
+                int32_t id_delta,
+                int32_t id_range_offset);
+        ~Segment();
+
+        // @return the startCount
+        int32_t start_count();
+        // @param startCount the startCount to set
+        void set_start_count(int32_t start_count);
+        // @return the endCount
+        int32_t end_count();
+        // @param endcount the endCount to set
+        void set_end_count(int32_t end_count);
+        // @return the idDelta
+        int32_t id_delta();
+        // @param idDelta the idDelta to set
+        void set_id_delta(int32_t id_delta);
+        // @return the idRangeOffset
+        int32_t id_range_offset();
+        // @param idRangeOffset the idRangeOffset to set
+        void set_id_range_offset(int32_t id_range_offset);
+
+        static CALLER_ATTACH
+        std::vector<Ptr<Segment> >*
+        DeepCopy(std::vector<Ptr<Segment> >* original);
+
+       private:
+        int32_t start_count_;
+        int32_t end_count_;
+        int32_t id_delta_;
+        int32_t id_range_offset_;
+      };
+      typedef std::vector<Ptr<Segment> > SegmentList;
+
+      static CALLER_ATTACH Builder* NewInstance(WritableFontData* data,
+                                                int32_t offset,
+                                                const CMapId& cmap_id);
+      static CALLER_ATTACH Builder* NewInstance(ReadableFontData* data,
+                                                int32_t offset,
+                                                const CMapId& cmap_id);
+      static CALLER_ATTACH Builder* NewInstance(const CMapId& cmap_id);
+      virtual ~Builder();
+      SegmentList* segments();
+      void set_segments(SegmentList* segments);
+      IntegerList* glyph_id_array();
+      void set_glyph_id_array(IntegerList* glyph_id_array);
+
+     protected:
+      Builder(WritableFontData* data, int32_t offset, const CMapId& cmap_id);
+      Builder(ReadableFontData* data, int32_t offset, const CMapId& cmap_id);
+      Builder(SegmentList* segments, IntegerList* glyph_id_array,
+              const CMapId& cmap_id);
+      explicit Builder(const CMapId& cmap_id);
+
+      virtual CALLER_ATTACH FontDataTable* SubBuildTable(
+          ReadableFontData* data);
+      virtual void SubDataSet();
+      virtual int32_t SubDataSizeToSerialize();
+      virtual bool SubReadyToSerialize();
+      virtual int32_t SubSerialize(WritableFontData* new_data);
+
+     private:
+      void Initialize(ReadableFontData* data);
+
+      SegmentList segments_;
+      IntegerList glyph_id_array_;
+    };
+
+    CMap::CharacterIterator* Iterator();
+    // CMapTable::CMapFormat4::CharacterIterator
+    class CharacterIterator : public CMap::CharacterIterator {
+     public:
+      bool HasNext();
+      int32_t Next();
+      virtual ~CharacterIterator() {}
+
+     private:
+      explicit CharacterIterator(CMapFormat4 *parent);
+      friend CMap::CharacterIterator* CMapFormat4::Iterator();
+
+      CMapFormat4* parent_;
+      int32_t segment_index_;
+      int32_t first_char_in_segment_;
+      int32_t last_char_in_segment_;
+      int32_t next_char_;
+      bool next_char_set_;
+    };
+
+    virtual int32_t GlyphId(int32_t character);
+
+    // Lower level glyph code retrieval that requires processing the Format 4
+    // segments to use.
+    // @param segment the cmap segment
+    // @param startCode the start code for the segment
+    // @param character the character to be looked up
+    // @return the glyph id for the character; CMapTable.NOTDEF if not found
+    int32_t RetrieveGlyphId(int32_t segment,
+                            int32_t start_count,
+                            int32_t character);
+    virtual int32_t Language();
+
+    // Get the count of the number of segments in this cmap.
+    // @return the number of segments
+    int32_t seg_count();
+    int32_t Length();
+    // Get the start code for a segment.
+    // @param segment the segment in the lookup table
+    // @return the start code for a segment
+    int32_t StartCode(int32_t segment);
+    // Get the end code for a segment.
+    // @param segment the segment in the look up table
+    // @return the end code for the segment
+    int32_t EndCode(int32_t segment);
+    // Get the id delta for a segment
+    // @param segment the segment in the look up table
+    // @return the id delta for the segment
+    int32_t IdDelta(int32_t segment);
+    // Get the id range offset for a segment
+    // @param segment the segment in the look up table
+    // @return the id range offset for the segment
+    int32_t IdRangeOffset(int32_t segment);
+    // Get the location of the id range offset for a segment
+    // @param segment the segment in the look up table
+    // @return the location of the id range offset for the segment
+    int32_t IdRangeOffsetLocation(int32_t segment);
+    // Declared above to allow friending inside CharacterIterator class.
+    // CMap::CharacterIterator* Iterator();
+    virtual ~CMapFormat4();
+
+   protected:
+    CMapFormat4(ReadableFontData* data, const CMapId& cmap_id);
+
+   private:
+    static int32_t Language(ReadableFontData* data);
+    static int32_t Length(ReadableFontData* data);
+    static int32_t SegCount(ReadableFontData* data);
+    static int32_t StartCode(ReadableFontData* data,
+                             int32_t seg_count,
+                             int32_t index);
+    static int32_t StartCodeOffset(int32_t seg_count);
+    static int32_t EndCode(ReadableFontData* data,
+                           int32_t seg_count,
+                           int32_t index);
+    static int32_t IdDelta(ReadableFontData* data,
+                           int32_t seg_count,
+                           int32_t index);
+    static int32_t IdDeltaOffset(int32_t seg_count);
+    static int32_t IdRangeOffset(ReadableFontData* data,
+                                 int32_t seg_count,
+                                 int32_t index);
+    static int32_t IdRangeOffsetOffset(int32_t seg_count);
+    static int32_t GlyphIdArrayOffset(int32_t seg_count);
+    // Refactored void to bool to work without exceptions.
+    bool IsValidIndex(int32_t segment);
+    int32_t GlyphIdArray(int32_t index);
+
+    int32_t seg_count_;
+    int32_t start_code_offset_;
+    int32_t end_code_offset_;
+    int32_t id_delta_offset_;
+    int32_t id_range_offset_offset_;
+    int32_t glyph_id_array_offset_;
+  };
+
   // CMapTable::Builder
   class Builder : public SubTableContainerTable::Builder,
                   public RefCounted<Builder> {
@@ -527,6 +705,7 @@
 };
 typedef std::vector<CMapTable::CMapId> CMapIdList;
 typedef Ptr<CMapTable> CMapTablePtr;
+typedef std::vector<Ptr<CMapTable::CMapFormat4::Builder::Segment> > SegmentList;
 }  // namespace sfntly
 
 #endif  // SFNTLY_CPP_SRC_SFNTLY_TABLE_CORE_CMAP_TABLE_H_
diff --git a/test/cmap_editing_test.cc b/test/cmap_editing_test.cc
new file mode 100644
index 0000000..6df2720
--- /dev/null
+++ b/test/cmap_editing_test.cc
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2011 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+#include <algorithm>
+
+#include "sfntly/font.h"
+#include "sfntly/font_factory.h"
+#include "sfntly/table/core/font_header_table.h"
+#include "sfntly/tag.h"
+#include "sfntly/data/memory_byte_array.h"
+#include "sfntly/port/endian.h"
+#include "sfntly/port/file_input_stream.h"
+#include "sfntly/port/memory_output_stream.h"
+#include "test/test_data.h"
+#include "test/test_font_utils.h"
+#include "sfntly/table/core/cmap_table.h"
+#include "sfntly/port/refcount.h"
+#include "gtest/gtest.h"
+
+namespace sfntly {
+TEST(CMapEditingTest, RemoveAllButOneCMap) {
+  FontBuilderArray builders;
+  FontFactoryPtr font_factory;
+  font_factory.Attach(FontFactory::GetInstance());
+  BuilderForFontFile(SAMPLE_TTF_FILE, font_factory, &builders);
+  ASSERT_FALSE(builders.empty());
+  FontBuilderPtr font_builder = builders[0];
+  Ptr<CMapTable::Builder> cmap_table_builder =
+      (CMapTable::Builder*)font_builder->GetTableBuilder(Tag::cmap);
+  ASSERT_NE(cmap_table_builder, reinterpret_cast<CMapTable::Builder*>(NULL));
+  CMapTable::CMapBuilderMap*
+      cmap_builders = cmap_table_builder->GetCMapBuilders();
+  ASSERT_FALSE(cmap_builders->empty());
+
+  for (CMapTable::CMapBuilderMap::iterator
+           it = cmap_builders->begin(); it != cmap_builders->end();) {
+    if (it->second->cmap_id() == CMapTable::WINDOWS_BMP) {
+      ++it;
+    } else {
+      cmap_builders->erase(it++);
+    }
+  }
+  ASSERT_EQ(cmap_builders->size(), (uint32_t)1);
+  Font* font = font_builder->Build();
+  CMapTablePtr cmap_table = down_cast<CMapTable*>(font->GetTable(Tag::cmap));
+  ASSERT_EQ(1, cmap_table->NumCMaps());
+  CMapTable::CMapPtr cmap;
+  cmap.Attach(cmap_table->GetCMap(CMapTable::WINDOWS_BMP));
+  ASSERT_EQ(CMapTable::WINDOWS_BMP, cmap->cmap_id());
+  delete font;
+}
+
+TEST(CMapEditingTest, CopyAllCMapsToNewFont) {
+  FontArray fonts;
+  FontFactoryPtr font_factory;
+  font_factory.Attach(FontFactory::GetInstance());
+  LoadFont(SAMPLE_TTF_FILE, font_factory, &fonts);
+
+  ASSERT_FALSE(fonts.empty());
+  ASSERT_FALSE(fonts[0] == NULL);
+  FontPtr font = fonts[0];
+  CMapTablePtr cmap_table = down_cast<CMapTable*>(font->GetTable(Tag::cmap));
+  FontBuilderPtr font_builder;
+  font_builder.Attach(font_factory->NewFontBuilder());
+  Ptr<CMapTable::Builder> cmap_table_builder =
+      (CMapTable::Builder*)font_builder->NewTableBuilder(Tag::cmap);
+
+  CMapTable::CMapIterator cmap_iter(cmap_table, NULL);
+  while (cmap_iter.HasNext()) {
+    CMapTable::CMapPtr cmap;
+    cmap.Attach(cmap_iter.Next());
+    if (!cmap)
+      continue;
+    cmap_table_builder->NewCMapBuilder(cmap->cmap_id(), cmap->ReadFontData());
+  }
+
+  FontPtr new_font;
+  new_font.Attach(font_builder->Build());
+  CMapTablePtr new_cmap_table =
+      down_cast<CMapTable*>(font->GetTable(Tag::cmap));
+  ASSERT_EQ(cmap_table->NumCMaps(), new_cmap_table->NumCMaps());
+  CMapTable::CMapPtr cmap;
+  cmap.Attach(cmap_table->GetCMap(CMapTable::WINDOWS_BMP));
+  ASSERT_NE(cmap, reinterpret_cast<CMapTable::CMap*>(NULL));
+  ASSERT_EQ(CMapTable::WINDOWS_BMP, cmap->cmap_id());
+}
+}
diff --git a/test/cmap_iterator_test.cc b/test/cmap_iterator_test.cc
new file mode 100644
index 0000000..2e8e6fe
--- /dev/null
+++ b/test/cmap_iterator_test.cc
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2011 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+
+#include <vector>
+#include <string>
+#include <algorithm>
+
+#include "sfntly/font.h"
+#include "sfntly/font_factory.h"
+#include "sfntly/table/core/cmap_table.h"
+#include "sfntly/tag.h"
+#include "sfntly/port/type.h"
+#include "sfntly/port/refcount.h"
+#include "test/test_data.h"
+#include "test/test_font_utils.h"
+
+#include "gtest/gtest.h"
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace sfntly {
+using ::testing::TestWithParam;
+using ::testing::Values;
+
+typedef std::vector<bool> BitSet;
+
+class CMapIteratorTestCase {
+ public:
+  CMapIteratorTestCase(int32_t platform_id, int32_t encoding_id,
+                       const char* file_name)
+      : platform_id_(platform_id),
+        encoding_id_(encoding_id),
+        file_name_(file_name) {
+  }
+  ~CMapIteratorTestCase() {}
+  int32_t platform_id() const { return platform_id_; }
+  int32_t encoding_id() const { return encoding_id_; }
+  const char* file_name() const { return file_name_; }
+
+ private:
+  int32_t platform_id_;
+  int32_t encoding_id_;
+  const char* file_name_;
+};
+
+class CMapIteratorTests
+    : public ::testing::TestWithParam<CMapIteratorTestCase> {
+ public:
+  virtual void SetUp();
+  virtual void TearDown() {}
+
+  BitSet* GenerateCMapEntries(int32_t start, int32_t count);
+  int32_t CompareCMapIterAndBitSet(CMapTable::CMap::CharacterIterator*
+                                   character_iterator,
+                                   BitSet* bit_set);
+
+  Ptr<CMapTable::CMap> cmap_;
+};
+
+void CMapIteratorTests::SetUp() {
+  FontArray fonts;
+  Ptr<FontFactory> font_factory;
+  const char* file_name = GetParam().file_name();
+  LoadFont(file_name, font_factory, &fonts);
+  Ptr<Font> font;
+  font.Attach(fonts[0].Detach());
+  Ptr<CMapTable> cmap_table = down_cast<CMapTable*>(font->GetTable(Tag::cmap));
+  ASSERT_FALSE(cmap_table == NULL);
+  cmap_.Attach(cmap_table->GetCMap(GetParam().platform_id(),
+                                   GetParam().encoding_id()));
+  ASSERT_FALSE(cmap_ == NULL);
+}
+
+BitSet* CMapIteratorTests::GenerateCMapEntries(int32_t start, int32_t count) {
+  BitSet* entries = new BitSet(count);
+  for (int32_t c = start; c < start + count; ++c) {
+    int32_t g = cmap_->GlyphId(c);
+    if (g != CMapTable::NOTDEF)
+      (*entries)[c] = true;
+  }
+  return entries;
+}
+
+int32_t
+CMapIteratorTests::
+CompareCMapIterAndBitSet(CMapTable::CMap::CharacterIterator* character_iterator,
+                         BitSet* bit_set) {
+  int32_t iterator_not_bitset_count = 0;
+  BitSet::iterator end = bit_set->end(),
+      beginning = bit_set->begin(),
+      init_beginning = beginning,
+      current = std::find(beginning, end, true);
+  for (int32_t next_bit = current - beginning;
+       character_iterator->HasNext() && current != end;
+       next_bit = current - init_beginning) {
+    int32_t c = character_iterator->Next();
+    EXPECT_TRUE(c <= next_bit || current == end);
+    if (!(c <= next_bit || current == end))
+      return -1;
+    if (c == next_bit) {
+      beginning = current + 1;
+      current = std::find(beginning, end, true);
+    } else {
+      iterator_not_bitset_count++;
+    }
+  }
+  EXPECT_EQ(end, current);
+#if defined (SFNTLY_DEBUG_CMAP)
+  fprintf(stderr, "%s %d: Differences between iterator and bitset: %d\n",
+          cmap_->format(), GetParam().file_name(), iterator_not_bitset_count);
+#endif
+  return iterator_not_bitset_count;
+}
+
+TEST_P(CMapIteratorTests, IteratorTest) {
+  BitSet* bit_set = GenerateCMapEntries(0, 0x10ffff);
+  CMapTable::CMap::CharacterIterator* character_iterator = NULL;
+  character_iterator = cmap_->Iterator();
+  EXPECT_NE(character_iterator,
+            reinterpret_cast<CMapTable::CMap::CharacterIterator*>(NULL));
+  CompareCMapIterAndBitSet(character_iterator, bit_set);
+  delete character_iterator;
+  delete bit_set;
+}
+
+CMapIteratorTestCase kCMapIteratorTestsTestCases[] = {
+  CMapIteratorTestCase(CMapTable::WINDOWS_BMP.platform_id,
+                       CMapTable::WINDOWS_BMP.encoding_id,
+                       SAMPLE_TTF_FILE)
+};
+
+INSTANTIATE_TEST_CASE_P(CMapIteratorTests,
+                        CMapIteratorTests,
+                        ::testing::ValuesIn(kCMapIteratorTestsTestCases));
+}
+
+#else
+
+TEST(DummyTest, ValueParameterizedTestsAreNotSupportedOnThisPlatform) {}
+
+#endif  // GTEST_HAS_PARAM
diff --git a/test/cmap_test.cc b/test/cmap_test.cc
new file mode 100644
index 0000000..f870184
--- /dev/null
+++ b/test/cmap_test.cc
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2011 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include <unicode/ucnv.h>
+
+#include <string>
+#include <iostream>
+
+#include "gtest/gtest.h"
+#include "sfntly/font.h"
+#include "sfntly/font_factory.h"
+#include "sfntly/table/core/cmap_table.h"
+#include "sfntly/data/memory_byte_array.h"
+#include "sfntly/table/core/font_header_table.h"
+#include "sfntly/tag.h"
+
+#include "test/test_utils.h"
+#include "test/test_font_utils.h"
+#include "test/test_data.h"
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace sfntly {
+using ::testing::TestWithParam;
+using ::testing::Values;
+
+class CMapTestCase {
+ public:
+  CMapTestCase(const char* font_name,
+               int32_t first_platform_id,
+               int32_t first_encoding_id,
+               const char* first_charset_name,
+               int32_t second_platform_id,
+               int32_t second_encoding_id,
+               const char* second_charset_name,
+               int32_t low_char,
+               int32_t high_char)
+      : font_name_(font_name),
+        first_platform_id_(first_platform_id),
+        first_encoding_id_(first_encoding_id),
+        first_charset_name_(first_charset_name),
+        second_platform_id_(second_platform_id),
+        second_encoding_id_(second_encoding_id),
+        second_charset_name_(second_charset_name),
+        low_char_(low_char),
+        high_char_(high_char) {
+  }
+
+  const char* font_name() const { return font_name_; }
+  int32_t first_platform_id() const { return first_platform_id_; }
+  int32_t first_encoding_id() const { return first_encoding_id_; }
+  const char* first_charset_name() const { return first_charset_name_; }
+  int32_t second_platform_id() const { return second_platform_id_; }
+  int32_t second_encoding_id() const { return second_encoding_id_; }
+  const char* second_charset_name() const { return second_charset_name_; }
+  int32_t low_char() const { return low_char_; }
+  int32_t high_char() const { return high_char_; }
+
+ private:
+  const char* font_name_;
+  int32_t first_platform_id_;
+  int32_t first_encoding_id_;
+  const char* first_charset_name_;
+  int32_t second_platform_id_;
+  int32_t second_encoding_id_;
+  const char* second_charset_name_;
+  int32_t low_char_;
+  int32_t high_char_;
+};
+
+class CMapTests : public :: testing::TestWithParam<CMapTestCase> {
+ public:
+  CMapTests() : encoder1_(NULL), encoder2_(NULL), successful_setup_(false) {
+  }
+  virtual void SetUp() {}
+  virtual void TearDown();
+
+  void CommonSetUp(FontArray* font_array);
+
+  void CompareCMaps();
+
+  Ptr<CMapTable::CMap> cmap1_;
+  Ptr<CMapTable::CMap> cmap2_;
+  UConverter* encoder1_;
+  UConverter* encoder2_;
+  bool successful_setup_;
+};
+
+::std::ostream& operator<<(::std::ostream& os, const CMapTestCase *test_case) {
+  return os << "("
+            << test_case->font_name() << ", "
+            << test_case->first_platform_id() << ", "
+            << test_case->first_encoding_id() << ", "
+            << test_case->first_charset_name() << ", "
+            << test_case->second_platform_id() << ", "
+            << test_case->second_encoding_id() << ", "
+            << test_case->second_charset_name() << ", "
+            << test_case->low_char() << ", "
+            << test_case->high_char() << ")";
+}
+
+void CMapTests::CommonSetUp(FontArray* font_array) {
+  ASSERT_NE(font_array, reinterpret_cast<FontArray*>(NULL));
+  ASSERT_FALSE(font_array->empty());
+  Ptr<Font> font;
+  font = font_array->at(0);
+  ASSERT_NE(font, reinterpret_cast<Font*>(NULL));
+  Ptr<CMapTable> cmap_table =
+      down_cast<CMapTable*>(font->GetTable(Tag::cmap));
+  cmap1_.Attach(cmap_table->GetCMap(GetParam().first_platform_id(),
+                                    GetParam().first_encoding_id()));
+  ASSERT_NE((cmap1_), reinterpret_cast<CMapTable::CMap*>(NULL));
+  cmap2_.Attach(cmap_table->GetCMap(GetParam().second_platform_id(),
+                                    GetParam().second_encoding_id()));
+  ASSERT_NE((cmap2_), reinterpret_cast<CMapTable::CMap*>(NULL));
+  encoder1_ = TestUtils::GetEncoder(GetParam().first_charset_name());
+  encoder2_ = TestUtils::GetEncoder(GetParam().second_charset_name());
+  successful_setup_ = true;
+}
+
+void CMapTests::TearDown() {
+  if (encoder1_)
+    ucnv_close(encoder1_);
+  if (encoder2_)
+    ucnv_close(encoder2_);
+}
+
+void CMapTests::CompareCMaps() {
+  ASSERT_TRUE(successful_setup_);
+  for (int32_t uchar = GetParam().low_char();
+       uchar <= GetParam().high_char(); ++uchar) {
+    int32_t c1 = uchar;
+    if (encoder1_ != NULL)
+      c1 = TestUtils::EncodeOneChar(encoder1_, uchar);
+    int32_t c2 = uchar;
+    if (encoder2_ != NULL)
+      c2 = TestUtils::EncodeOneChar(encoder2_, uchar);
+    int32_t glyph_id1 = cmap1_->GlyphId(c1);
+    int32_t glyph_id2 = cmap2_->GlyphId(c2);
+#ifdef SFNTLY_DEBUG_CMAP
+    if (glyph_id1 != glyph_id2)
+      fprintf(stderr, "%x: g1=%x, %x: g2=%x\n", c1, glyph_id1, c2, glyph_id2);
+#endif
+    ASSERT_EQ(glyph_id1, glyph_id2);
+  }
+#ifdef SFNTLY_SFNTLY_DEBUG_CMAPCMAP
+  fprintf(stderr, "\n");
+#endif
+}
+
+TEST_P(CMapTests, GlyphsBetweenCMapsFingerprint) {
+  Ptr<FontFactory> font_factory;
+  font_factory.Attach(FontFactory::GetInstance());
+  font_factory->FingerprintFont(true);
+  FontArray font_array;
+  LoadFont(GetParam().font_name(), font_factory, &font_array);
+  CommonSetUp(&font_array);
+  CompareCMaps();
+}
+
+TEST_P(CMapTests, GlyphsBetweenCMapsNoFingerprint) {
+  Ptr<FontFactory> font_factory;
+  font_factory.Attach(FontFactory::GetInstance());
+  FontArray font_array;
+  LoadFont(GetParam().font_name(), font_factory, &font_array);
+  CommonSetUp(&font_array);
+  CompareCMaps();
+}
+
+TEST_P(CMapTests, GlyphsBetweenCMapsUsingByteVector) {
+  FontArray font_array;
+  LoadFontUsingByteVector(GetParam().font_name(), true, &font_array);
+  CommonSetUp(&font_array);
+  CompareCMaps();
+}
+
+CMapTestCase kCMapTestsTestCases[] = {
+  CMapTestCase(SAMPLE_TTF_FILE,
+               PlatformId::kWindows,
+               WindowsEncodingId::kUnicodeUCS2,
+               NULL,
+               PlatformId::kUnicode,
+               UnicodeEncodingId::kUnicode2_0_BMP,
+               NULL,
+               (int32_t)0x20,
+               (int32_t)0x7f),
+};
+
+INSTANTIATE_TEST_CASE_P(CMapTests,
+                        CMapTests,
+                        ::testing::ValuesIn(kCMapTestsTestCases));
+}
+
+#else
+
+TEST(DummyTest, ValueParameterizedTestsAreNotSupportedOnThisPlatform) {}
+
+#endif  // GTEST_HAS_PARAM