blob: 3b3ba57093a279c502a50c3dca1e214c25d4dc29 [file] [log] [blame] [edit]
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <algorithm>
#include <cstdio>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <unordered_map>
#include <pybind11/iostream.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <executorch/devtools/bundled_program/bundled_program.h>
#include <executorch/devtools/bundled_program/schema/bundled_program_schema_generated.h>
#include <executorch/devtools/etdump/etdump_flatcc.h>
#include <executorch/extension/data_loader/buffer_data_loader.h>
#include <executorch/extension/data_loader/mmap_data_loader.h>
#include <executorch/extension/memory_allocator/malloc_memory_allocator.h>
#include <executorch/runtime/core/data_loader.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
#include <executorch/runtime/executor/method.h>
#include <executorch/runtime/executor/program.h>
#include <executorch/runtime/kernel/operator_registry.h>
#include <executorch/runtime/platform/assert.h>
#include <executorch/runtime/platform/platform.h>
#include <executorch/runtime/platform/profiler.h>
#include <executorch/runtime/platform/runtime.h>
#include <ATen/Functions.h>
#include <ATen/Tensor.h>
#include <ATen/core/functional.h>
#include <c10/core/ScalarTypeToTypeMeta.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/python.h>
#ifndef USE_ATEN_LIB
#include <c10/core/impl/LocalDispatchKeySet.h>
#include <executorch/extension/aten_util/aten_bridge.h>
#endif
/// Throws a runtime_error with the provided message if `error` is not `Ok`.
#define THROW_IF_ERROR(error, message, ...) \
({ \
if ((error) != Error::Ok) { \
char msg_buf[128]; \
snprintf(msg_buf, sizeof(msg_buf), message, ##__VA_ARGS__); \
/* pybind will convert this to a python exception. */ \
throw std::runtime_error(msg_buf); \
} \
})
#define THROW_INDEX_IF_ERROR(error, message, ...) \
({ \
if ((error) != Error::Ok) { \
char msg_buf[128]; \
snprintf(msg_buf, sizeof(msg_buf), message, ##__VA_ARGS__); \
/* pybind will convert this to a python exception. */ \
throw std::out_of_range(msg_buf); \
} \
})
// Our logs work by writing to stderr. By default this is done through fprintf
// (as defined in posix.cpp) which then does not show up in python environments.
// Here we override the pal to use std::cerr which can be properly redirected by
// scoped_estream_redirect.
void et_pal_emit_log_message(
et_timestamp_t timestamp,
et_pal_log_level_t level,
const char* filename,
ET_UNUSED const char* function,
size_t line,
const char* message,
ET_UNUSED size_t length) {
std::cerr << "[" << filename << ":" << line << "] " << message << std::endl;
}
namespace py = pybind11;
using executorch::bundled_program::verify_method_outputs;
using ::executorch::extension::BufferDataLoader;
using ::executorch::extension::MallocMemoryAllocator;
using ::executorch::extension::MmapDataLoader;
using ::executorch::runtime::ArrayRef;
using ::executorch::runtime::DataLoader;
using ::executorch::runtime::Error;
using ::executorch::runtime::EValue;
using ::executorch::runtime::EventTracerDebugLogLevel;
using ::executorch::runtime::get_registered_kernels;
using ::executorch::runtime::HierarchicalAllocator;
using ::executorch::runtime::Kernel;
using ::executorch::runtime::MemoryAllocator;
using ::executorch::runtime::MemoryManager;
using ::executorch::runtime::Method;
using ::executorch::runtime::prof_result_t;
using ::executorch::runtime::Program;
using ::executorch::runtime::Result;
using ::executorch::runtime::Span;
using ::executorch::runtime::Tag;
using torch::executor::etdump_result;
using torch::executor::ETDumpGen;
#ifndef USE_ATEN_LIB
using ::executorch::extension::alias_attensor_to_etensor;
using ::executorch::extension::alias_etensor_to_attensor;
using ::executorch::extension::torch_to_executorch_scalar_type;
#endif // !USE_ATEN_LIB
namespace executorch {
namespace extension {
namespace pybindings {
namespace {
void write_data_to_file(const std::string& path, void* buf, size_t size) {
FILE* f = fopen(path.c_str(), "w+");
if (!f) {
throw std::runtime_error(
"Failed to open file " + path + ": " + strerror(errno));
}
size_t num_written = fwrite(buf, 1, size, f);
if (num_written != size) {
fclose(f);
throw std::runtime_error("Failed to write etdump to file " + path);
}
int err = fclose(f);
if (err) {
throw std::runtime_error(
"Failed to close etdump file " + path + ": " + strerror(err));
}
}
void setup_output_storage(
Method& method,
const std::vector<Span<uint8_t>>& output_storages) {
if (output_storages.size() != method.outputs_size()) {
THROW_IF_ERROR(
Error::InvalidArgument,
"number of output storages %zu does not match number of outputs %zu",
output_storages.size(),
method.outputs_size());
}
for (size_t i = 0; i < output_storages.size(); ++i) {
if (output_storages[i].size() == 0) {
// Skip empty output storages, this would happen for non-tensor outputs
// and memory planned outputs.
continue;
}
Error output_status = method.set_output_data_ptr(
output_storages[i].data(), output_storages[i].size(), i);
// We already should be skipping non-tensor outputs, and memory planned
// outputs so any error is real.
THROW_IF_ERROR(
output_status,
"set_output_data_ptr failed for output %zu with error 0x%" PRIx32,
i,
static_cast<uint32_t>(output_status));
}
}
class Module final {
public:
explicit Module(
std::unique_ptr<DataLoader> loader,
std::unique_ptr<ETDumpGen> tracer = nullptr,
size_t debug_buffer_size = 0,
Program::Verification program_verification =
Program::Verification::InternalConsistency)
: loader_(std::move(loader)),
event_tracer_(std::move(tracer)),
debug_buffer_size_(debug_buffer_size) {
::executorch::runtime::runtime_init();
Result<Program> program =
Program::load(loader_.get(), program_verification);
THROW_IF_ERROR(
program.error(),
"loading program failed with error: 0x%" PRIx32,
static_cast<uint32_t>(program.error()));
program_ = std::make_unique<Program>(std::move(program.get()));
// Figure out the size of each non_const layer we need to support every
// method in the program. Map will be easier to use than a list because we
// dont know how many non_const arenas there will be
std::map<size_t, int64_t> non_const_buffer_sizes;
for (size_t i = 0; i < program_->num_methods(); ++i) {
auto name = program_->get_method_name(i).get();
auto method_meta = program_->method_meta(name).get();
for (size_t j = 0; j < method_meta.num_non_const_buffers(); j++) {
int64_t buffer_size = method_meta.non_const_buffer_size(j).get();
if (non_const_buffer_sizes.find(j) == non_const_buffer_sizes.end()) {
non_const_buffer_sizes.insert({j, buffer_size});
} else {
non_const_buffer_sizes[j] =
std::max(non_const_buffer_sizes[j], buffer_size);
}
}
}
// Allocate the arenas. Using vector because we need to remember the size as
// well, so vector is easier then unique_ptr.
std::vector<std::vector<uint8_t>> non_const_buffers_;
for (std::map<size_t, int64_t>::iterator i = non_const_buffer_sizes.begin();
i != non_const_buffer_sizes.end();
i++) {
non_const_buffers_.push_back(std::vector<uint8_t>(i->second));
}
memory_ = std::make_unique<Memory>(std::move(non_const_buffers_));
if (event_tracer_ && debug_buffer_size > 0) {
// If a debug buffer was requested for the ETDump, allocate it and make
// sure its lifetime is as long as the event_tracer.
debug_buffer_ = std::make_unique<uint8_t[]>(debug_buffer_size);
event_tracer_->set_debug_buffer(get_etdump_debug_buffer());
event_tracer_->set_event_tracer_debug_level(
EventTracerDebugLogLevel::kIntermediateOutputs);
}
// Load methods
for (size_t i = 0; i < program_->num_methods(); ++i) {
auto name = program_->get_method_name(i).get();
// It's safe to use the same memory manager for all modules because
// we can guarantee that only one will be executing at a time.
// Everything in this module runs on a single thread.
Result<Method> method = program_->load_method(
name, memory_->mem_manager(), event_tracer_.get());
THROW_IF_ERROR(
method.error(),
"loading method %s failed with error 0x%" PRIx32,
name,
static_cast<uint32_t>(method.error()));
methods_.insert(
{std::string(name),
std::make_unique<Method>(std::move(method.get()))});
}
}
Module(const Module&) = delete;
Module& operator=(const Module&) = delete;
Module(Module&&) = default;
Module& operator=(Module&&) = default;
/// Executes the specified method on the provided inputs and returns its
/// outputs.
std::vector<EValue> run_method(
const std::string& method_name,
const std::vector<EValue>& args,
const std::optional<std::vector<Span<uint8_t>>>& output_storages =
std::nullopt) {
auto& method = get_method(method_name);
exec_aten::ArrayRef<EValue> input_evalue_list(args.data(), args.size());
Error set_inputs_status = method.set_inputs(input_evalue_list);
THROW_IF_ERROR(
set_inputs_status,
"method->set_inputs() for method '%s' failed with error 0x%" PRIx32,
method_name.c_str(),
static_cast<uint32_t>(set_inputs_status));
#ifdef USE_ATEN_LIB
// [TLS handling] This is to workaround an assertion failure
// (https://fburl.com/code/302jyn8d) running `gelu` in ATen mode in fbcode
// (such as bento). The problem is ExecuTorch ATen mode doesn't have
// Thread Local State, but `torch-cpp` is assuming tls init is done. There
// are two more checks: MKLDNN disabled and C10_MOBILE, if any of them is
// true we won't be hitting this assertion error. However in `torch-cpp`
// lib both checks are false. Production impact: this should not make any
// impact in production environment, given that in xplat we are depending
// on a library that enables C10_MOBILE (`torch_mobile_core`).
c10::impl::ExcludeDispatchKeyGuard no_autograd(
c10::autograd_dispatch_keyset);
#endif
if (output_storages) {
setup_output_storage(method, *output_storages);
}
Error execute_status = method.execute();
THROW_IF_ERROR(
execute_status,
"method->execute() failed with error 0x%" PRIx32,
static_cast<uint32_t>(execute_status));
// process outputs
return get_outputs(method_name);
}
std::vector<EValue> get_outputs(const std::string& method_name) {
auto& method = methods_[method_name];
std::vector<EValue> result(method->outputs_size());
Error get_outputs_status =
method->get_outputs(result.data(), method->outputs_size());
THROW_IF_ERROR(
get_outputs_status,
"method->get_outputs() for method '%s' failed with error 0x%" PRIx32,
method_name.c_str(),
static_cast<uint32_t>(get_outputs_status));
return result;
}
Method& get_method(const std::string& method_name) {
if (methods_.count(method_name) == 0) {
THROW_IF_ERROR(
Error::InvalidArgument,
"no such method in program: %s",
method_name.c_str());
}
return *methods_[method_name].get();
}
/// Returns the names of all methods in the program.
std::vector<std::string> method_names() const {
std::vector<std::string> names;
for (const auto& method : methods_) {
names.push_back(method.first);
}
return names;
}
bool has_etdump() {
return static_cast<bool>(event_tracer_);
}
ETDumpGen& etdump() {
return *event_tracer_;
}
bool has_etdump_debug_buffer() const {
return static_cast<bool>(debug_buffer_);
}
Span<uint8_t> get_etdump_debug_buffer() {
return Span<uint8_t>(debug_buffer_.get(), debug_buffer_size_);
}
private:
/// A wrapper/util class for executorch memory allocations/manager.
class Memory {
public:
explicit Memory(std::vector<std::vector<uint8_t>>&& non_const_buffers)
: runtime_allocator_(),
non_const_buffers_(std::move(non_const_buffers)),
non_const_spans_(create_non_const_spans()),
non_const_allocator_(
{non_const_spans_.data(), non_const_spans_.size()}),
mem_manager_(
&const_allocator_,
&non_const_allocator_,
&runtime_allocator_,
&temp_allocator_) {}
/// Returns a pointer to the internal memory manager, the Memory instance
/// must outlive this pointer.
MemoryManager* mem_manager() {
return &mem_manager_;
}
Memory(const Memory&) = delete;
Memory& operator=(const Memory&) = delete;
private:
MemoryAllocator const_allocator_{MemoryAllocator(0, nullptr)};
MallocMemoryAllocator runtime_allocator_;
MemoryAllocator temp_allocator_{MemoryAllocator(0, nullptr)};
std::vector<std::vector<uint8_t>> non_const_buffers_;
std::vector<Span<uint8_t>> non_const_spans_;
HierarchicalAllocator non_const_allocator_;
MemoryManager mem_manager_;
std::vector<Span<uint8_t>> create_non_const_spans() {
std::vector<Span<uint8_t>> result;
for (size_t i = 0; i < non_const_buffers_.size(); i++) {
result.push_back(
{non_const_buffers_[i].data(), non_const_buffers_[i].size()});
}
return result;
}
};
std::unique_ptr<Memory> memory_;
std::unique_ptr<DataLoader> loader_; // program_ points to this.
std::unique_ptr<const Program> program_; // methods_ entries points to this.
std::unordered_map<std::string, std::unique_ptr<Method>> methods_;
std::unique_ptr<ETDumpGen> event_tracer_;
std::unique_ptr<uint8_t[]> debug_buffer_;
size_t debug_buffer_size_;
};
inline std::unique_ptr<Module> load_module_from_buffer(
const void* ptr,
size_t ptr_len,
bool enable_etdump,
size_t debug_buffer_size,
Program::Verification program_verification) {
EXECUTORCH_SCOPE_PROF("load_module_from_buffer");
auto loader = std::make_unique<BufferDataLoader>(ptr, ptr_len);
return std::make_unique<Module>(
std::move(loader),
enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
debug_buffer_size,
program_verification);
}
inline std::unique_ptr<Module> load_module_from_file(
const std::string& path,
bool enable_etdump,
size_t debug_buffer_size,
Program::Verification program_verification) {
EXECUTORCH_SCOPE_PROF("load_module_from_file");
Result<MmapDataLoader> res = MmapDataLoader::from(
path.c_str(), MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
THROW_IF_ERROR(
res.error(),
"Failed to create MmapDataLoader from file %s, error: 0x:%" PRIx32,
path.c_str(),
static_cast<uint32_t>(res.error()));
auto loader = std::make_unique<MmapDataLoader>(std::move(res.get()));
return std::make_unique<Module>(
std::move(loader),
enable_etdump ? std::make_unique<torch::executor::ETDumpGen>() : nullptr,
debug_buffer_size,
program_verification);
}
static constexpr size_t kDEFAULT_BUNDLED_INPUT_POOL_SIZE = 16 * 1024U;
struct PyBundledModule final {
explicit PyBundledModule(
const py::bytes& buffer,
uint32_t bundled_input_pool_size)
: bundled_program_ptr_(buffer),
program_ptr_(static_cast<const void*>(
bundled_program_flatbuffer::GetBundledProgram(
get_bundled_program_ptr())
->program()
->data())),
program_len_(bundled_program_flatbuffer::GetBundledProgram(
get_bundled_program_ptr())
->program()
->size()) {}
static std::unique_ptr<PyBundledModule> load_from_buffer(
const py::bytes& buffer,
uint32_t bundled_input_pool_size) {
return std::make_unique<PyBundledModule>(buffer, bundled_input_pool_size);
}
const void* get_bundled_program_ptr() {
return bundled_program_ptr_.cast<std::string_view>().data();
}
const void* get_program_ptr() {
return program_ptr_;
}
size_t get_program_len() {
return program_len_;
}
private:
// Store the bytes object instead of a raw pointer so that this module will
// keep the bytes alive.
const py::bytes bundled_program_ptr_;
const void* program_ptr_;
size_t program_len_;
};
/// Expose a subset of TensorInfo information to python.
struct PyTensorInfo final {
explicit PyTensorInfo(
std::shared_ptr<Module> module,
torch::executor::TensorInfo info)
: module_(std::move(module)), info_(info) {}
py::tuple sizes() const {
const auto shape = info_.sizes();
py::tuple tup(shape.size());
for (size_t i = 0; i < shape.size(); ++i) {
tup[i] = py::cast(shape[i]);
}
return tup;
}
int8_t dtype() const {
return static_cast<std::underlying_type<exec_aten::ScalarType>::type>(
info_.scalar_type());
}
bool is_memory_planned() const {
return info_.is_memory_planned();
}
size_t nbytes() const {
return info_.nbytes();
}
std::string repr() const {
std::string size_str = "[";
for (const auto& d : info_.sizes()) {
size_str.append(std::to_string(d));
size_str.append(", ");
}
if (size_str.length() >= 2) {
// Pop the last two characters (command and space) and add close bracket.
size_str.pop_back();
size_str.pop_back();
}
size_str.append("]");
return "TensorInfo(sizes=" + size_str + ", dtype=" +
std::string(executorch::runtime::toString(info_.scalar_type())) +
", is_memory_planned=" +
(info_.is_memory_planned() ? "True" : "False") +
", nbytes=" + std::to_string(info_.nbytes()) + ")";
}
private:
// TensorInfo relies on module to be alive.
std::shared_ptr<Module> module_;
torch::executor::TensorInfo info_;
};
/// Expose a subset of MethodMeta information to python.
struct PyMethodMeta final {
explicit PyMethodMeta(
std::shared_ptr<Module> module,
torch::executor::MethodMeta meta)
: module_(std::move(module)), meta_(meta) {}
const char* name() const {
return meta_.name();
}
size_t num_inputs() const {
return meta_.num_inputs();
}
std::unique_ptr<PyTensorInfo> input_tensor_meta(size_t index) const {
const auto result = meta_.input_tensor_meta(index);
THROW_INDEX_IF_ERROR(
result.error(), "Cannot get input tensor meta at %zu", index);
return std::make_unique<PyTensorInfo>(module_, result.get());
}
size_t num_outputs() const {
return meta_.num_outputs();
}
std::unique_ptr<PyTensorInfo> output_tensor_meta(size_t index) const {
const auto result = meta_.output_tensor_meta(index);
THROW_INDEX_IF_ERROR(
result.error(), "Cannot get output tensor meta at %zu", index);
return std::make_unique<PyTensorInfo>(module_, result.get());
}
py::str repr() const {
py::list input_meta_strs;
for (size_t i = 0; i < meta_.num_inputs(); ++i) {
input_meta_strs.append(py::str(input_tensor_meta(i)->repr()));
}
py::list output_meta_strs;
for (size_t i = 0; i < meta_.num_outputs(); ++i) {
output_meta_strs.append(py::str(output_tensor_meta(i)->repr()));
}
// Add quotes to be more similar to Python's repr for strings.
py::str format =
"MethodMeta(name='{}', num_inputs={}, input_tensor_meta={}, num_outputs={}, output_tensor_meta={})";
return format.format(
std::string(meta_.name()),
std::to_string(meta_.num_inputs()),
input_meta_strs,
std::to_string(meta_.num_outputs()),
output_meta_strs);
}
private:
// Must keep the Module object alive or else the meta object is invalidated.
std::shared_ptr<Module> module_;
torch::executor::MethodMeta meta_;
};
struct PyModule final {
explicit PyModule(
const py::bytes& buffer,
bool enable_etdump,
size_t debug_buffer_size = 0,
Program::Verification program_verification =
Program::Verification::InternalConsistency)
: module_(load_module_from_buffer(
buffer.cast<std::string_view>().data(),
py::len(buffer),
enable_etdump,
debug_buffer_size,
program_verification)) {}
explicit PyModule(
const void* ptr,
size_t ptr_len,
bool enable_etdump,
size_t debug_buffer_size = 0,
Program::Verification program_verification =
Program::Verification::InternalConsistency)
: module_(load_module_from_buffer(
ptr,
ptr_len,
enable_etdump,
debug_buffer_size,
program_verification)) {}
explicit PyModule(
const std::string& path,
bool enable_etdump,
size_t debug_buffer_size = 0,
Program::Verification program_verification =
Program::Verification::InternalConsistency)
: module_(load_module_from_file(
path,
enable_etdump,
debug_buffer_size,
program_verification)) {}
PyModule(const PyModule&) = delete;
PyModule& operator=(const PyModule&) = delete;
PyModule(PyModule&&) = default;
PyModule& operator=(PyModule&&) = default;
// Module is only valid as long as the python buffer is alive.
static std::unique_ptr<PyModule> load_from_buffer(
const py::bytes& buffer,
bool enable_etdump,
size_t debug_buffer_size = 0,
Program::Verification program_verification =
Program::Verification::InternalConsistency) {
return std::make_unique<PyModule>(
buffer, enable_etdump, debug_buffer_size, program_verification);
}
static std::unique_ptr<PyModule> load_from_file(
const std::string& path,
bool enable_etdump,
size_t debug_buffer_size = 0,
Program::Verification program_verification =
Program::Verification::InternalConsistency) {
return std::make_unique<PyModule>(
path, enable_etdump, debug_buffer_size, program_verification);
}
static std::unique_ptr<PyModule> load_from_bundled_program(
PyBundledModule& m,
bool enable_etdump,
size_t debug_buffer_size = 0) {
return std::make_unique<PyModule>(
m.get_program_ptr(),
m.get_program_len(),
enable_etdump,
debug_buffer_size);
}
py::list run_method(
const std::string& method_name,
const py::sequence& inputs,
bool clone_outputs = true) {
const auto inputs_size = py::len(inputs);
std::vector<EValue> cpp_inputs;
cpp_inputs.reserve(inputs_size);
#ifndef USE_ATEN_LIB // Portable mode
// So the ETensors and their metadata stay in scope for
// Module->run_method.
std::vector<torch::executor::TensorImpl> input_tensors;
std::vector<std::vector<torch::executor::Tensor::SizesType>> input_sizes;
std::vector<std::vector<torch::executor::Tensor::StridesType>>
input_strides;
std::vector<std::vector<torch::executor::Tensor::DimOrderType>>
input_dim_order;
// We store pointers to these vector elements so important to reserve so
// that we don't lose those on a vector resize. Don't need to do this for
// the others since they are vectors of vectors, and we don't store a
// pointer to the root level vector data.
input_tensors.reserve(inputs_size);
#endif
// Convert python objects into EValues.
for (size_t i = 0; i < inputs_size; ++i) {
auto python_input = inputs[i];
const std::string& type_str = py::str(python_input.get_type());
if (type_str == "<class 'torch.Tensor'>") {
auto at_tensor = python_input.cast<at::Tensor>();
// alias_etensor_to_attensor will assert on this later, so to better
// propogate up to python we check early and throw an exception.
if (!at_tensor.is_contiguous()) {
auto error_msg = "Input " + std::to_string(i) + "for method " +
method_name + " is not contiguous.";
throw std::runtime_error(error_msg);
}
#ifdef USE_ATEN_LIB
EValue evalue(at_tensor);
#else
// convert at::Tensor to torch::executor::Tensor
auto type =
torch_to_executorch_scalar_type(at_tensor.options().dtype());
size_t dim = at_tensor.dim();
// cant directly alias at::Tensor sizes and strides due to int64 vs
// int32 typing conflict
input_sizes.emplace_back(
at_tensor.sizes().begin(), at_tensor.sizes().end());
input_strides.emplace_back(
at_tensor.strides().begin(), at_tensor.strides().end());
// Only works for MemoryFormat::Contiguous inputs
std::vector<torch::executor::Tensor::DimOrderType> dim_order;
for (size_t cur_dim = 0; cur_dim < dim; cur_dim++) {
dim_order.push_back(cur_dim);
}
input_dim_order.push_back(std::move(dim_order));
input_tensors.emplace_back(
type,
dim,
input_sizes.back().data(),
nullptr,
input_dim_order.back().data(),
input_strides.back().data());
torch::executor::Tensor temp =
torch::executor::Tensor(&input_tensors.back());
alias_etensor_to_attensor(at_tensor, temp);
EValue evalue(temp);
#endif
cpp_inputs.push_back(evalue);
} else if (py::isinstance<py::none>(python_input)) {
cpp_inputs.push_back(EValue());
} else if (py::isinstance<py::bool_>(python_input)) {
cpp_inputs.push_back(EValue(py::cast<bool>(python_input)));
} else if (py::isinstance<py::int_>(python_input)) {
cpp_inputs.push_back(EValue(py::cast<int64_t>(python_input)));
} else {
ET_ASSERT_UNREACHABLE_MSG("Unsupported pytype: %s", type_str.c_str());
}
}
const auto& method = module_->get_method(method_name);
const auto num_outputs = method.outputs_size();
output_storages_ = make_output_storages(method);
std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
for (int i = 0; i < output_storages_.size(); ++i) {
output_storage_spans[i] =
Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
}
auto outputs =
module_->run_method(method_name, cpp_inputs, output_storage_spans);
// Retrieve outputs
return get_outputs_as_py_list(outputs, clone_outputs);
}
py::list forward(const py::sequence& inputs, bool clone_outputs = true) {
return run_method("forward", inputs, clone_outputs);
}
py::list forward_single_input(
const torch::Tensor& inputTensor,
bool clone_outputs = true) {
py::list py_list;
py_list.append(py::cast(inputTensor));
return run_method("forward", py_list, clone_outputs);
}
bool has_etdump() {
return module_->has_etdump();
}
void write_etdump_result_to_file(
const std::string& path,
const py::object& debug_buffer_path) {
if (!has_etdump()) {
throw std::runtime_error("No etdump found");
}
auto& etdump = module_->etdump();
etdump_result result = etdump.get_etdump_data();
if (result.buf != nullptr && result.size > 0) {
write_data_to_file(path, result.buf, result.size);
free(result.buf);
if (module_->has_etdump_debug_buffer() &&
py::isinstance<py::str>(debug_buffer_path)) {
// Also write out the debug buffer to a separate file if requested.
std::string debug_buffer_path_str =
py::cast<py::str>(debug_buffer_path);
const auto debug_buffer = module_->get_etdump_debug_buffer();
write_data_to_file(
debug_buffer_path_str, debug_buffer.data(), debug_buffer.size());
}
} else {
ET_LOG(
Info,
"No etdump data found, try rebuilding with "
"the CMake option EXECUTORCH_ENABLE_EVENT_TRACER or with "
"buck run --config executorch.event_tracer_enabled=true");
}
}
void load_bundled_input(
PyBundledModule& m,
const std::string method_name,
size_t testset_idx) {
const void* bundled_program_ptr = m.get_bundled_program_ptr();
Error status = executorch::bundled_program::load_bundled_input(
module_->get_method(method_name), bundled_program_ptr, testset_idx);
THROW_IF_ERROR(
status,
"load_bundled_input failed with status 0x%" PRIx32,
static_cast<uint32_t>(status));
}
py::list verify_result_with_bundled_expected_output(
PyBundledModule& m,
const std::string method_name,
size_t testset_idx,
double rtol = 1e-5,
double atol = 1e-8) {
const void* bundled_program_ptr = m.get_bundled_program_ptr();
auto& method = module_->get_method(method_name);
Error status = executorch::bundled_program::load_bundled_input(
method, bundled_program_ptr, testset_idx);
THROW_IF_ERROR(
status,
"load_bundled_input failed with status 0x%" PRIx32,
static_cast<uint32_t>(status));
py::list outputs = plan_execute(method_name);
status = executorch::bundled_program::verify_method_outputs(
method, bundled_program_ptr, testset_idx, rtol, atol);
THROW_IF_ERROR(
status,
"Result verification failed with status %" PRIu32,
static_cast<uint32_t>(status));
return outputs;
}
py::list plan_execute(
const std::string method_name,
bool clone_outputs = true) {
auto& method = module_->get_method(method_name);
// Need to pre-allocate space for outputs just like in run_method.
const auto num_outputs = method.outputs_size();
output_storages_ = make_output_storages(method);
std::vector<Span<uint8_t>> output_storage_spans(num_outputs);
for (int i = 0; i < output_storages_.size(); ++i) {
output_storage_spans[i] =
Span<uint8_t>(output_storages_[i].data(), output_storages_[i].size());
}
setup_output_storage(method, output_storage_spans);
auto status = method.execute();
THROW_IF_ERROR(
status,
"executing execution plan for method 'forward' failed with error: 0x%" PRIx32,
static_cast<uint32_t>(status));
const auto outputs = module_->get_outputs(method_name);
return get_outputs_as_py_list(outputs, clone_outputs);
}
py::list get_outputs_as_py_list(
const std::vector<EValue>& outputs,
bool clone_outputs = true) {
const auto outputs_size = outputs.size();
py::list list(outputs_size);
for (size_t i = 0; i < outputs_size; ++i) {
auto& v = outputs[i];
if (Tag::None == v.tag) {
list[i] = py::none();
} else if (Tag::Int == v.tag) {
list[i] = py::cast(v.toInt());
} else if (Tag::Double == v.tag) {
list[i] = py::cast(v.toDouble());
} else if (Tag::Bool == v.tag) {
list[i] = py::cast(v.toBool());
} else if (Tag::String == v.tag) {
list[i] = py::cast(std::string(v.toString().data()));
} else if (Tag::Tensor == v.tag) {
#ifdef USE_ATEN_LIB
// Clone so the outputs in python do not share a lifetime with the
// module object
if (clone_outputs) {
list[i] = py::cast(v.toTensor().clone());
} else {
list[i] = py::cast(v.toTensor());
}
#else
if (clone_outputs) {
list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()).clone());
} else {
list[i] = py::cast(alias_attensor_to_etensor(v.toTensor()));
}
#endif
} else {
ET_ASSERT_UNREACHABLE_MSG("Invalid model output type");
}
}
return list;
}
std::unique_ptr<PyMethodMeta> method_meta(const std::string method_name) {
auto& method = module_->get_method(method_name);
return std::make_unique<PyMethodMeta>(module_, method.method_meta());
}
std::vector<std::string> method_names() {
return module_->method_names();
}
private:
std::shared_ptr<Module> module_;
// Need to keep-alive output storages until they can be compared in case of
// bundled programs.
std::vector<std::vector<uint8_t>> output_storages_;
std::vector<std::vector<uint8_t>> make_output_storages(const Method& method) {
const auto num_outputs = method.outputs_size();
// Create a buffer for each output tensor. Memory planned outputs and non
// tensor outputs get an empty buffer in this list which is ignored later.
std::vector<std::vector<uint8_t>> output_storages;
output_storages_.reserve(num_outputs);
auto meta = method.method_meta();
for (size_t i = 0; i < num_outputs; ++i) {
auto output_type = meta.output_tag(i);
THROW_IF_ERROR(
output_type.error(), "Failed to get output type for output %zu", i);
if (output_type.get() != Tag::Tensor) {
// Skip allocating storage for non-tensor outputs.
output_storages.emplace_back();
continue;
}
const auto& output_tensor_meta =
method.method_meta().output_tensor_meta(i);
THROW_IF_ERROR(
output_tensor_meta.error(),
"Failed to get output tensor meta for output %zu",
i);
if (output_tensor_meta.get().is_memory_planned()) {
// Skip allocating storage for planned memory outputs.
output_storages.emplace_back();
continue;
}
// Allocate storage for the output tensor.
const size_t output_size = output_tensor_meta.get().nbytes();
output_storages.emplace_back(output_size);
}
return output_storages;
}
};
void create_profile_block(const std::string& name) {
EXECUTORCH_PROFILE_CREATE_BLOCK(name.c_str());
}
py::list get_operator_names() {
Span<const Kernel> kernels = get_registered_kernels();
py::list res;
for (const Kernel& k : kernels) {
if (k.name_ != nullptr) {
res.append(py::cast(k.name_));
}
}
return res;
}
} // namespace
PYBIND11_MODULE(EXECUTORCH_PYTHON_MODULE_NAME, m) {
// Redirects cout and cerr for function calls this guards to the python env.
auto call_guard = py::
call_guard<py::scoped_ostream_redirect, py::scoped_estream_redirect>();
// Bind the verification enum to python.
py::enum_<Program::Verification>(m, "Verification")
.value("Minimal", Program::Verification::Minimal)
.value("InternalConsistency", Program::Verification::InternalConsistency);
m.def(
"_load_for_executorch",
PyModule::load_from_file,
py::arg("path"),
py::arg("enable_etdump") = false,
py::arg("debug_buffer_size") = 0,
py::arg("program_verification") =
Program::Verification::InternalConsistency,
call_guard);
m.def(
"_load_for_executorch_from_buffer",
&PyModule::load_from_buffer,
py::arg("buffer"),
py::arg("enable_etdump") = false,
py::arg("debug_buffer_size") = 0,
py::arg("program_verification") =
Program::Verification::InternalConsistency,
call_guard);
m.def(
"_load_for_executorch_from_bundled_program",
&PyModule::load_from_bundled_program,
py::arg("ptr"),
py::arg("enable_etdump") = false,
py::arg("debug_buffer_size") = 0,
call_guard);
m.def(
"_load_bundled_program_from_buffer",
&PyBundledModule::load_from_buffer,
py::arg("buffer"),
py::arg("non_const_pool_size") = kDEFAULT_BUNDLED_INPUT_POOL_SIZE,
call_guard);
m.def(
"_dump_profile_results",
[]() {
prof_result_t prof_result;
EXECUTORCH_DUMP_PROFILE_RESULTS(&prof_result);
return py::bytes(
reinterpret_cast<const char*>(prof_result.prof_data),
prof_result.num_bytes);
},
call_guard);
m.def("_get_operator_names", &get_operator_names);
m.def("_create_profile_block", &create_profile_block, call_guard);
m.def(
"_reset_profile_results",
[]() { EXECUTORCH_RESET_PROFILE_RESULTS(); },
call_guard);
py::class_<PyModule>(m, "ExecuTorchModule")
.def("load_bundled_input", &PyModule::load_bundled_input, call_guard)
.def(
"verify_result_with_bundled_expected_output",
&PyModule::verify_result_with_bundled_expected_output,
py::arg("bundle"),
py::arg("method_name"),
py::arg("testset_idx"),
py::arg("rtol") = 1e-5,
py::arg("atol") = 1e-8,
call_guard)
.def(
"plan_execute",
&PyModule::plan_execute,
py::arg("method_name"),
py::arg("clone_outputs") = true,
call_guard)
.def(
"method_meta",
&PyModule::method_meta,
py::arg("method_name"),
call_guard)
.def("method_names", &PyModule::method_names, call_guard)
.def(
"run_method",
&PyModule::run_method,
py::arg("method_name"),
py::arg("inputs") = py::list(),
py::arg("clone_outputs") = true,
call_guard)
.def(
"forward",
&PyModule::forward,
py::arg("inputs") = py::list(),
py::arg("clone_outputs") = true,
call_guard)
.def("has_etdump", &PyModule::has_etdump, call_guard)
.def(
"write_etdump_result_to_file",
&PyModule::write_etdump_result_to_file,
py::arg("path"),
py::arg("debug_buffer_path") = py::none(),
call_guard)
.def(
"__call__",
&PyModule::forward,
py::arg("inputs") = py::list(),
py::arg("clone_outputs") = true,
call_guard)
.def(
"__call__",
&PyModule::forward_single_input,
py::arg("inputs") = py::list(),
py::arg("clone_outputs") = true,
call_guard);
py::class_<PyBundledModule>(m, "BundledModule");
py::class_<PyTensorInfo>(m, "TensorInfo")
.def("sizes", &PyTensorInfo::sizes, call_guard)
.def("dtype", &PyTensorInfo::dtype, call_guard)
.def("is_memory_planned", &PyTensorInfo::is_memory_planned, call_guard)
.def("nbytes", &PyTensorInfo::nbytes, call_guard)
.def("__repr__", &PyTensorInfo::repr, call_guard);
py::class_<PyMethodMeta>(m, "MethodMeta")
.def("name", &PyMethodMeta::name, call_guard)
.def("num_inputs", &PyMethodMeta::num_inputs, call_guard)
.def("num_outputs", &PyMethodMeta::num_outputs, call_guard)
.def(
"input_tensor_meta",
&PyMethodMeta::input_tensor_meta,
py::arg("index"),
call_guard)
.def(
"output_tensor_meta",
&PyMethodMeta::output_tensor_meta,
py::arg("index"),
call_guard)
.def("__repr__", &PyMethodMeta::repr, call_guard);
}
} // namespace pybindings
} // namespace extension
} // namespace executorch