blob: f2ed3cb4b9cc8618848b0bd0656ecc19ec20da23 [file] [log] [blame]
// Copyright (c) 2016 The WebM project authors. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the LICENSE file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
#ifndef SRC_BYTE_PARSER_H_
#define SRC_BYTE_PARSER_H_
#include <cassert>
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
#include "src/element_parser.h"
#include "webm/callback.h"
#include "webm/element.h"
#include "webm/reader.h"
#include "webm/status.h"
namespace webm {
// Parses an EBML string (UTF-8 and ASCII) or binary element from a byte stream.
// Spec reference for string/binary elements:
// http://matroska.org/technical/specs/index.html#EBML_ex
// https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown#ebml-element-types
template <typename T>
class ByteParser : public ElementParser {
public:
static_assert(std::is_same<T, std::vector<std::uint8_t>>::value ||
std::is_same<T, std::string>::value,
"T must be std::vector<std::uint8_t> or std::string");
// Constructs a new parser which will use the given default_value as the
// value for the element if its size is zero. Defaults to the empty string
// or empty binary element (as the EBML spec indicates).
explicit ByteParser(T default_value = {})
: default_value_(std::move(default_value)) {}
ByteParser(ByteParser&&) = default;
ByteParser& operator=(ByteParser&&) = default;
ByteParser(const ByteParser&) = delete;
ByteParser& operator=(const ByteParser&) = delete;
Status Init(const ElementMetadata& metadata,
std::uint64_t max_size) override {
assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
if (metadata.size == kUnknownElementSize) {
return Status(Status::kInvalidElementSize);
}
if (metadata.size > std::numeric_limits<std::size_t>::max() ||
metadata.size > value_.max_size()) {
return Status(Status::kNotEnoughMemory);
}
#if WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT
// AFL and ASan just kill the process if too much memory is allocated, so
// let's cap the maximum size of the element. It's too easy for the fuzzer
// to make an element with a ridiculously huge size, and that just creates
// uninteresting false positives.
if (metadata.size > WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT) {
return Status(Status::kNotEnoughMemory);
}
#endif
if (metadata.size == 0) {
value_ = default_value_;
total_read_ = default_value_.size();
} else {
value_.resize(static_cast<std::size_t>(metadata.size));
total_read_ = 0;
}
return Status(Status::kOkCompleted);
}
Status Feed(Callback* callback, Reader* reader,
std::uint64_t* num_bytes_read) override {
assert(callback != nullptr);
assert(reader != nullptr);
assert(num_bytes_read != nullptr);
*num_bytes_read = 0;
if (total_read_ == value_.size()) {
return Status(Status::kOkCompleted);
}
Status status;
do {
std::uint64_t local_num_bytes_read = 0;
std::uint8_t* buffer =
reinterpret_cast<std::uint8_t*>(&value_.front()) + total_read_;
std::size_t buffer_size = value_.size() - total_read_;
status = reader->Read(buffer_size, buffer, &local_num_bytes_read);
assert((status.completed_ok() && local_num_bytes_read == buffer_size) ||
(status.ok() && local_num_bytes_read < buffer_size) ||
(!status.ok() && local_num_bytes_read == 0));
*num_bytes_read += local_num_bytes_read;
total_read_ += static_cast<std::size_t>(local_num_bytes_read);
} while (status.code == Status::kOkPartial);
// UTF-8 and ASCII string elements can be padded with NUL characters at the
// end, which should be ignored.
if (std::is_same<T, std::string>::value && status.completed_ok()) {
while (!value_.empty() && value_.back() == '\0') {
value_.pop_back();
}
}
return status;
}
// Gets the parsed value. This must not be called until the parse has been
// successfully completed.
const T& value() const {
assert(total_read_ >= value_.size());
return value_;
}
// Gets the parsed value. This must not be called until the parse has been
// successfully completed.
T* mutable_value() {
assert(total_read_ >= value_.size());
return &value_;
}
private:
T value_;
T default_value_;
std::size_t total_read_;
};
using StringParser = ByteParser<std::string>;
using BinaryParser = ByteParser<std::vector<std::uint8_t>>;
} // namespace webm
#endif // SRC_BYTE_PARSER_H_