blob: 1aae0f2a98f8932de17e87248089a07f66ad670b [file] [log] [blame]
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @file
*
* This tool can run ExecuTorch model files that only use operators that
* are covered by the portable kernels, with possible delegate to the
* test_backend_compiler_lib.
*
* It sets all input tensor data to ones, and assumes that the outputs are
* all fp32 tensors.
*/
#include <fstream>
#include <memory>
#include <gflags/gflags.h>
#include <executorch/devtools/bundled_program/bundled_program.h>
#include <executorch/devtools/etdump/etdump_flatcc.h>
#include <executorch/extension/data_loader/buffer_data_loader.h>
#include <executorch/runtime/executor/method.h>
#include <executorch/runtime/executor/program.h>
#include <executorch/runtime/platform/log.h>
#include <executorch/runtime/platform/runtime.h>
static std::array<uint8_t, 4 * 1024U * 1024U> method_allocator_pool; // 4MB
DEFINE_string(
bundled_program_path,
"model_bundled.bpte",
"Model serialized in flatbuffer format.");
DEFINE_int32(
testset_idx,
0,
"Index of bundled verification set to be run "
"by bundled model for verification");
DEFINE_string(
etdump_path,
"etdump.etdp",
"If etdump generation is enabled an etdump will be written out to this path");
DEFINE_bool(
output_verification,
false,
"Comapre the model output to the reference outputs present in the BundledProgram.");
DEFINE_bool(
print_output,
false,
"Print the output of the ET model to stdout, if needs.");
DEFINE_bool(dump_outputs, false, "Dump outputs to etdump file");
DEFINE_bool(
dump_intermediate_outputs,
false,
"Dump intermediate outputs to etdump file.");
DEFINE_string(
debug_output_path,
"debug_output.bin",
"Path to dump debug outputs to.");
DEFINE_int32(
debug_buffer_size,
262144, // 256 KB
"Size of the debug buffer in bytes to allocate for intermediate outputs and program outputs logging.");
using executorch::etdump::ETDumpGen;
using executorch::etdump::ETDumpResult;
using executorch::extension::BufferDataLoader;
using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::EventTracerDebugLogLevel;
using executorch::runtime::HierarchicalAllocator;
using executorch::runtime::MemoryAllocator;
using executorch::runtime::MemoryManager;
using executorch::runtime::Method;
using executorch::runtime::MethodMeta;
using executorch::runtime::Program;
using executorch::runtime::Result;
using executorch::runtime::Span;
std::vector<uint8_t> load_file_or_die(const char* path) {
std::ifstream file(path, std::ios::binary | std::ios::ate);
const size_t nbytes = file.tellg();
file.seekg(0, std::ios::beg);
auto file_data = std::vector<uint8_t>(nbytes);
ET_CHECK_MSG(
file.read(reinterpret_cast<char*>(file_data.data()), nbytes),
"Could not load contents of file '%s'",
path);
return file_data;
}
int main(int argc, char** argv) {
executorch::runtime::runtime_init();
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (argc != 1) {
std::string msg = "Extra commandline args:";
for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
msg += std::string(" ") + argv[i];
}
ET_LOG(Error, "%s", msg.c_str());
return 1;
}
// Read in the entire file.
const char* bundled_program_path = FLAGS_bundled_program_path.c_str();
std::vector<uint8_t> file_data = load_file_or_die(bundled_program_path);
// Find the offset to the embedded Program.
const void* program_data;
size_t program_data_len;
Error status = executorch::bundled_program::get_program_data(
reinterpret_cast<void*>(file_data.data()),
file_data.size(),
&program_data,
&program_data_len);
ET_CHECK_MSG(
status == Error::Ok,
"get_program_data() failed on file '%s': 0x%x",
bundled_program_path,
(unsigned int)status);
auto buffer_data_loader = BufferDataLoader(program_data, program_data_len);
// Parse the program file. This is immutable, and can also be reused
// between multiple execution invocations across multiple threads.
Result<Program> program = Program::load(&buffer_data_loader);
if (!program.ok()) {
ET_LOG(Error, "Failed to parse model file %s", bundled_program_path);
return 1;
}
ET_LOG(Info, "Model file %s is loaded.", bundled_program_path);
// Use the first method in the program.
const char* method_name = nullptr;
{
const auto method_name_result = program->get_method_name(0);
ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
method_name = *method_name_result;
}
ET_LOG(Info, "Running method %s", method_name);
// MethodMeta describes the memory requirements of the method.
Result<MethodMeta> method_meta = program->method_meta(method_name);
ET_CHECK_MSG(
method_meta.ok(),
"Failed to get method_meta for %s: 0x%x",
method_name,
(unsigned int)method_meta.error());
//
// The runtime does not use malloc/new; it allocates all memory using the
// MemoryManger provided by the client. Clients are responsible for allocating
// the memory ahead of time, or providing MemoryAllocator subclasses that can
// do it dynamically.
//
// The method allocator is used to allocate all dynamic C++ metadata/objects
// used to represent the loaded method. This allocator is only used during
// loading a method of the program, which will return an error if there was
// not enough memory.
//
// The amount of memory required depends on the loaded method and the runtime
// code itself. The amount of memory here is usually determined by running the
// method and seeing how much memory is actually used, though it's possible to
// subclass MemoryAllocator so that it calls malloc() under the hood (see
// MallocMemoryAllocator).
//
// In this example we use a statically allocated memory pool.
MemoryAllocator method_allocator{MemoryAllocator(
sizeof(method_allocator_pool), method_allocator_pool.data())};
// The memory-planned buffers will back the mutable tensors used by the
// method. The sizes of these buffers were determined ahead of time during the
// memory-planning pasees.
//
// Each buffer typically corresponds to a different hardware memory bank. Most
// mobile environments will only have a single buffer. Some embedded
// environments may have more than one for, e.g., slow/large DRAM and
// fast/small SRAM, or for memory associated with particular cores.
std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
// .get() will always succeed because id < num_memory_planned_buffers.
size_t buffer_size =
static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
planned_spans.push_back({planned_buffers.back().get(), buffer_size});
}
HierarchicalAllocator planned_memory(
{planned_spans.data(), planned_spans.size()});
// Assemble all of the allocators into the MemoryManager that the Executor
// will use.
MemoryManager memory_manager(&method_allocator, &planned_memory);
//
// Load the method from the program, using the provided allocators. Running
// the method can mutate the memory-planned buffers, so the method should only
// be used by a single thread at at time, but it can be reused.
//
ETDumpGen etdump_gen;
Result<Method> method =
program->load_method(method_name, &memory_manager, &etdump_gen);
ET_CHECK_MSG(
method.ok(),
"Loading of method %s failed with status 0x%" PRIx32,
method_name,
method.error());
ET_LOG(Info, "Method loaded.");
void* debug_buffer = malloc(FLAGS_debug_buffer_size);
if (FLAGS_dump_intermediate_outputs) {
Span<uint8_t> buffer((uint8_t*)debug_buffer, FLAGS_debug_buffer_size);
etdump_gen.set_debug_buffer(buffer);
etdump_gen.set_event_tracer_debug_level(
EventTracerDebugLogLevel::kIntermediateOutputs);
} else if (FLAGS_dump_outputs) {
Span<uint8_t> buffer((uint8_t*)debug_buffer, FLAGS_debug_buffer_size);
etdump_gen.set_debug_buffer(buffer);
etdump_gen.set_event_tracer_debug_level(
EventTracerDebugLogLevel::kProgramOutputs);
}
// Use the inputs embedded in the bundled program.
status = executorch::bundled_program::load_bundled_input(
*method, file_data.data(), FLAGS_testset_idx);
ET_CHECK_MSG(
status == Error::Ok,
"LoadBundledInput failed with status 0x%" PRIx32,
status);
ET_LOG(Info, "Inputs prepared.");
// Run the model.
status = method->execute();
ET_CHECK_MSG(
status == Error::Ok,
"Execution of method %s failed with status 0x%" PRIx32,
method_name,
status);
ET_LOG(Info, "Model executed successfully.");
// Print the outputs.
if (FLAGS_print_output) {
std::vector<EValue> outputs(method->outputs_size());
status = method->get_outputs(outputs.data(), outputs.size());
ET_CHECK(status == Error::Ok);
for (EValue& output : outputs) {
// TODO(T159700776): This assumes that all outputs are fp32 tensors. Add
// support for other EValues and Tensor dtypes, and print tensors in a
// more readable way.
auto output_tensor = output.toTensor();
auto data_output = output_tensor.const_data_ptr<float>();
for (size_t j = 0; j < output_tensor.numel(); ++j) {
ET_LOG(Info, "%f", data_output[j]);
}
}
}
// Dump the etdump data containing profiling/debugging data to the specified
// file.
ETDumpResult result = etdump_gen.get_etdump_data();
if (result.buf != nullptr && result.size > 0) {
FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+");
fwrite((uint8_t*)result.buf, 1, result.size, f);
fclose(f);
free(result.buf);
}
if (FLAGS_output_verification) {
// Verify the outputs.
status = executorch::bundled_program::verify_method_outputs(
*method,
file_data.data(),
FLAGS_testset_idx,
1e-3, // rtol
1e-5 // atol
);
ET_CHECK_MSG(
status == Error::Ok,
"Bundle verification failed with status 0x%" PRIx32,
status);
ET_LOG(Info, "Model verified successfully.");
}
if (FLAGS_dump_outputs || FLAGS_dump_intermediate_outputs) {
FILE* f = fopen(FLAGS_debug_output_path.c_str(), "w+");
fwrite((uint8_t*)debug_buffer, 1, FLAGS_debug_buffer_size, f);
fclose(f);
}
free(debug_buffer);
return 0;
}