| // Copyright (c) 2016 The WebM project authors. All Rights Reserved. |
| // |
| // Use of this source code is governed by a BSD-style license |
| // that can be found in the LICENSE file in the root of the source |
| // tree. An additional intellectual property rights grant can be found |
| // in the file PATENTS. All contributing project authors may |
| // be found in the AUTHORS file in the root of the source tree. |
| #ifndef SRC_MASTER_PARSER_H_ |
| #define SRC_MASTER_PARSER_H_ |
| |
| #include <cassert> |
| #include <cstdint> |
| #include <functional> |
| #include <memory> |
| #include <type_traits> |
| #include <unordered_map> |
| #include <utility> |
| |
| #include "src/element_parser.h" |
| #include "src/id_parser.h" |
| #include "src/size_parser.h" |
| #include "src/skip_parser.h" |
| #include "src/unknown_parser.h" |
| #include "src/void_parser.h" |
| #include "webm/callback.h" |
| #include "webm/element.h" |
| #include "webm/id.h" |
| #include "webm/reader.h" |
| #include "webm/status.h" |
| |
| namespace webm { |
| |
| // A general purpose parser for EBML master elements. |
| // |
| // For example, if a document specification defines a Foo master element that |
| // has two boolean children (Bar and Baz), then a FooParser capable of parsing |
| // the Foo master element could be defined as follows: |
| // |
| // struct FooParser : public MasterParser { |
| // FooParser() |
| // : MasterParser(MakeChild<BoolParser>(Id::kBar), |
| // MakeChild<BoolParser>(Id::kBaz)) {} |
| // }; |
| // |
| // See the MasterValueParser for an alternative class for parsing master |
| // elements into a data structure. |
| class MasterParser : public ElementParser { |
| public: |
| // Constructs a new MasterParser that uses the given |
| // {Id, std::unique_ptr<ElementParser>} pairs to map child IDs to the |
| // appropriate parser/handler. Each argument must be of type |
| // std::pair<Id, std::unique_ptr<ElementParser>>. If a parser is not |
| // explicitly provided for Id::kVoid, a VoidParser will automatically be used |
| // for it. |
| // |
| // Initializer lists don't support move-only types (i.e. std::unique_ptr), so |
| // instead a variadic template is used. |
| template <typename... T> |
| explicit MasterParser(T&&... parser_pairs) { |
| // Prefer an odd reserve size. This makes libc++ use a prime number for the |
| // bucket count. Otherwise, if it happens to be a power of 2, then libc++ |
| // will use a power-of-2 bucket count (and since Matroska EBML IDs have low |
| // entropy in the low bits, there will be a lot of collisions). libstdc++ |
| // always prefers a prime bucket count. I'm not sure how MSVC or others are |
| // implemented, but this shouldn't adversely affect them even if they are |
| // implemented differently. Add one to the count because we'll likely need |
| // to insert a parser for Id::kVoid. |
| parsers_.reserve((sizeof...(T) + 1) | 1); |
| |
| // This dummy initializer list is just used to force the parameter pack to |
| // be expanded, which turns the expression into a for-each "loop" that |
| // inserts each argument into the map. |
| auto dummy = {0, (InsertParser(std::forward<T>(parser_pairs)), 0)...}; |
| (void)dummy; // Silence unused variable warning. |
| |
| if (parsers_.find(Id::kVoid) == parsers_.end()) { |
| InsertParser(MakeChild<VoidParser>(Id::kVoid)); |
| } |
| } |
| |
| MasterParser(const MasterParser&) = delete; |
| MasterParser& operator=(const MasterParser&) = delete; |
| |
| Status Init(const ElementMetadata& metadata, std::uint64_t max_size) override; |
| |
| void InitAfterSeek(const Ancestory& child_ancestory, |
| const ElementMetadata& child_metadata) override; |
| |
| Status Feed(Callback* callback, Reader* reader, |
| std::uint64_t* num_bytes_read) override; |
| |
| bool GetCachedMetadata(ElementMetadata* metadata) override; |
| |
| std::uint32_t header_size() const { return header_size_; } |
| |
| // Gets the size of this element. May be called before the parse is fully |
| // complete (but only after Init() has already been called and successfully |
| // returned). |
| std::uint64_t size() const { return my_size_; } |
| |
| // Gets absolute byte position of the start of the element in the byte stream. |
| // May be called before the parse is fully complete (but only after Init() has |
| // already been called and successfully returned). |
| std::uint64_t position() const { return my_position_; } |
| |
| // Gets the metadata for the child that is currently being parsed. This may |
| // only be called while the child's body (not its header information like ID |
| // and size) is being parsed. |
| const ElementMetadata& child_metadata() const { |
| assert(state_ == State::kValidatingChildSize || |
| state_ == State::kGettingAction || |
| state_ == State::kInitializingChildParser || |
| state_ == State::kReadingChildBody); |
| return child_metadata_; |
| } |
| |
| protected: |
| // Allocates a new parser of type T, forwarding args to the constructor, and |
| // creates a std::pair<Id, std::unique_ptr<ElementParser>> using the given id |
| // and the allocated parser. |
| template <typename T, typename... Args> |
| static std::pair<Id, std::unique_ptr<ElementParser>> MakeChild( |
| Id id, Args&&... args) { |
| std::unique_ptr<ElementParser> ptr(new T(std::forward<Args>(args)...)); |
| return std::pair<Id, std::unique_ptr<ElementParser>>(id, std::move(ptr)); |
| } |
| |
| private: |
| // Parsing states for the finite-state machine. |
| enum class State { |
| /* clang-format off */ |
| // State Transitions to state When |
| kFirstReadOfChildId, // kFinishingReadingChildId size(id) > 1 |
| // kReadingChildSize size(id) == 1 |
| // kEndReached EOF |
| kFinishingReadingChildId, // kReadingChildSize done |
| kReadingChildSize, // kValidatingChildSize done |
| kValidatingChildSize, // kGettingAction done |
| // kEndReached unknown id & unsized |
| kGettingAction, // kInitializingChildParser done |
| kInitializingChildParser, // kReadingChildBody done |
| kReadingChildBody, // kChildFullyParsed child parse done |
| kChildFullyParsed, // kValidatingChildSize cached metadata |
| // kFirstReadOfChildId read < my_size_ |
| // kEndReached read == my_size_ |
| kEndReached, // No transitions from here (must call Init) |
| /* clang-format on */ |
| }; |
| |
| using StdHashId = std::hash<std::underlying_type<Id>::type>; |
| |
| // Hash functor for hashing Id enums for storage in std::unordered_map. |
| struct IdHash : StdHashId { |
| // Type aliases for conforming to the std::hash interface. |
| using argument_type = Id; |
| using result_type = StdHashId::result_type; |
| |
| // Returns the hash of the given id. |
| result_type operator()(argument_type id) const { |
| return StdHashId::operator()(static_cast<StdHashId::argument_type>(id)); |
| } |
| }; |
| |
| // The parser for parsing element Ids. |
| IdParser id_parser_; |
| |
| // The parser for parsing element sizes. |
| SizeParser size_parser_; |
| |
| // Metadata for the child element that is currently being parsed. |
| ElementMetadata child_metadata_; |
| |
| // Maps child IDs to the appropriate parser that can handle that child. |
| std::unordered_map<Id, std::unique_ptr<ElementParser>, IdHash> parsers_; |
| |
| // The parser that is used to parse unknown children. |
| UnknownParser unknown_parser_; |
| |
| // The parser that is used to skip over children. |
| SkipParser skip_parser_; |
| |
| // The parser that is being used to parse the current child. This must be null |
| // or a pointer in parsers_. |
| ElementParser* child_parser_; |
| |
| // The current parsing action for the child that is currently being parsed. |
| Action action_; |
| |
| // The current state of the parser. |
| State state_; |
| |
| std::uint32_t header_size_; |
| |
| // The size of this element. |
| std::uint64_t my_size_; |
| |
| std::uint64_t my_position_; |
| |
| std::uint64_t max_size_; |
| |
| // The total number of bytes read by this parser. |
| std::uint64_t total_bytes_read_; |
| |
| // Set to true if parsing has completed and this parser consumed an extra |
| // element header (ID and size) that wasn't from a child. |
| bool has_cached_metadata_ = false; |
| |
| // Inserts the parser into the parsers_ map and asserts it is the only parser |
| // registers to parse the corresponding Id. |
| template <typename T> |
| void InsertParser(T&& parser) { |
| bool inserted = parsers_.insert(std::forward<T>(parser)).second; |
| (void)inserted; // Silence unused variable warning. |
| assert(inserted); // Make sure there aren't duplicates. |
| } |
| |
| // Common initialization logic for Init/InitAfterseek. |
| void InitSetup(std::uint32_t header_size, std::uint64_t size_in_bytes, |
| std::uint64_t position); |
| |
| // Resets the internal parsers in preparation for parsing the next child. |
| void PrepareForNextChild(); |
| }; |
| |
| } // namespace webm |
| |
| #endif // SRC_MASTER_PARSER_H_ |