| #include "marisa/grimoire/algorithm.h" |
| #include "marisa/grimoire/trie/state.h" |
| #include "marisa/grimoire/trie/tail.h" |
| |
| namespace marisa { |
| namespace grimoire { |
| namespace trie { |
| |
| Tail::Tail() : buf_(), end_flags_() {} |
| |
| void Tail::build(Vector<Entry> &entries, Vector<UInt32> *offsets, |
| TailMode mode) { |
| MARISA_THROW_IF(offsets == NULL, MARISA_NULL_ERROR); |
| |
| switch (mode) { |
| case MARISA_TEXT_TAIL: { |
| for (std::size_t i = 0; i < entries.size(); ++i) { |
| const char * const ptr = entries[i].ptr(); |
| const std::size_t length = entries[i].length(); |
| for (std::size_t j = 0; j < length; ++j) { |
| if (ptr[j] == '\0') { |
| mode = MARISA_BINARY_TAIL; |
| break; |
| } |
| } |
| if (mode == MARISA_BINARY_TAIL) { |
| break; |
| } |
| } |
| break; |
| } |
| case MARISA_BINARY_TAIL: { |
| break; |
| } |
| default: { |
| MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode"); |
| } |
| } |
| |
| Tail temp; |
| temp.build_(entries, offsets, mode); |
| swap(temp); |
| } |
| |
| void Tail::map(Mapper &mapper) { |
| Tail temp; |
| temp.map_(mapper); |
| swap(temp); |
| } |
| |
| void Tail::read(Reader &reader) { |
| Tail temp; |
| temp.read_(reader); |
| swap(temp); |
| } |
| |
| void Tail::write(Writer &writer) const { |
| write_(writer); |
| } |
| |
| void Tail::restore(Agent &agent, std::size_t offset) const { |
| MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR); |
| |
| State &state = agent.state(); |
| if (end_flags_.empty()) { |
| for (const char *ptr = &buf_[offset]; *ptr != '\0'; ++ptr) { |
| state.key_buf().push_back(*ptr); |
| } |
| } else { |
| do { |
| state.key_buf().push_back(buf_[offset]); |
| } while (!end_flags_[offset++]); |
| } |
| } |
| |
| bool Tail::match(Agent &agent, std::size_t offset) const { |
| MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR); |
| MARISA_DEBUG_IF(agent.state().query_pos() >= agent.query().length(), |
| MARISA_BOUND_ERROR); |
| |
| State &state = agent.state(); |
| if (end_flags_.empty()) { |
| const char * const ptr = &buf_[offset] - state.query_pos(); |
| do { |
| if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) { |
| return false; |
| } |
| state.set_query_pos(state.query_pos() + 1); |
| if (ptr[state.query_pos()] == '\0') { |
| return true; |
| } |
| } while (state.query_pos() < agent.query().length()); |
| return false; |
| } else { |
| do { |
| if (buf_[offset] != agent.query()[state.query_pos()]) { |
| return false; |
| } |
| state.set_query_pos(state.query_pos() + 1); |
| if (end_flags_[offset++]) { |
| return true; |
| } |
| } while (state.query_pos() < agent.query().length()); |
| return false; |
| } |
| } |
| |
| bool Tail::prefix_match(Agent &agent, std::size_t offset) const { |
| MARISA_DEBUG_IF(buf_.empty(), MARISA_STATE_ERROR); |
| |
| State &state = agent.state(); |
| if (end_flags_.empty()) { |
| const char *ptr = &buf_[offset] - state.query_pos(); |
| do { |
| if (ptr[state.query_pos()] != agent.query()[state.query_pos()]) { |
| return false; |
| } |
| state.key_buf().push_back(ptr[state.query_pos()]); |
| state.set_query_pos(state.query_pos() + 1); |
| if (ptr[state.query_pos()] == '\0') { |
| return true; |
| } |
| } while (state.query_pos() < agent.query().length()); |
| ptr += state.query_pos(); |
| do { |
| state.key_buf().push_back(*ptr); |
| } while (*++ptr != '\0'); |
| return true; |
| } else { |
| do { |
| if (buf_[offset] != agent.query()[state.query_pos()]) { |
| return false; |
| } |
| state.key_buf().push_back(buf_[offset]); |
| state.set_query_pos(state.query_pos() + 1); |
| if (end_flags_[offset++]) { |
| return true; |
| } |
| } while (state.query_pos() < agent.query().length()); |
| do { |
| state.key_buf().push_back(buf_[offset]); |
| } while (!end_flags_[offset++]); |
| return true; |
| } |
| } |
| |
| void Tail::clear() { |
| Tail().swap(*this); |
| } |
| |
| void Tail::swap(Tail &rhs) { |
| buf_.swap(rhs.buf_); |
| end_flags_.swap(rhs.end_flags_); |
| } |
| |
| void Tail::build_(Vector<Entry> &entries, Vector<UInt32> *offsets, |
| TailMode mode) { |
| for (std::size_t i = 0; i < entries.size(); ++i) { |
| entries[i].set_id(i); |
| } |
| Algorithm().sort(entries.begin(), entries.end()); |
| |
| Vector<UInt32> temp_offsets; |
| temp_offsets.resize(entries.size(), 0); |
| |
| const Entry dummy; |
| const Entry *last = &dummy; |
| for (std::size_t i = entries.size(); i > 0; --i) { |
| const Entry ¤t = entries[i - 1]; |
| MARISA_THROW_IF(current.length() == 0, MARISA_RANGE_ERROR); |
| std::size_t match = 0; |
| while ((match < current.length()) && (match < last->length()) && |
| ((*last)[match] == current[match])) { |
| ++match; |
| } |
| if ((match == current.length()) && (last->length() != 0)) { |
| temp_offsets[current.id()] = (UInt32)( |
| temp_offsets[last->id()] + (last->length() - match)); |
| } else { |
| temp_offsets[current.id()] = (UInt32)buf_.size(); |
| for (std::size_t j = 1; j <= current.length(); ++j) { |
| buf_.push_back(current[current.length() - j]); |
| } |
| if (mode == MARISA_TEXT_TAIL) { |
| buf_.push_back('\0'); |
| } else { |
| for (std::size_t j = 1; j < current.length(); ++j) { |
| end_flags_.push_back(false); |
| } |
| end_flags_.push_back(true); |
| } |
| MARISA_THROW_IF(buf_.size() > MARISA_UINT32_MAX, MARISA_SIZE_ERROR); |
| } |
| last = ¤t; |
| } |
| buf_.shrink(); |
| |
| offsets->swap(temp_offsets); |
| } |
| |
| void Tail::map_(Mapper &mapper) { |
| buf_.map(mapper); |
| end_flags_.map(mapper); |
| } |
| |
| void Tail::read_(Reader &reader) { |
| buf_.read(reader); |
| end_flags_.read(reader); |
| } |
| |
| void Tail::write_(Writer &writer) const { |
| buf_.write(writer); |
| end_flags_.write(writer); |
| } |
| |
| } // namespace trie |
| } // namespace grimoire |
| } // namespace marisa |