examples/devtools/example_runner/example_runner.cpp - platform/external/executorch - Git at Google

 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
  */

 /**
  * @file
  *
  * This tool can run ExecuTorch model files that only use operators that
  * are covered by the portable kernels, with possible delegate to the
  * test_backend_compiler_lib.
  *
  * It sets all input tensor data to ones, and assumes that the outputs are
  * all fp32 tensors.
  */

 #include <fstream>
 #include <memory>

 #include <gflags/gflags.h>

 #include <executorch/devtools/bundled_program/bundled_program.h>
 #include <executorch/devtools/etdump/etdump_flatcc.h>
 #include <executorch/extension/data_loader/buffer_data_loader.h>
 #include <executorch/runtime/executor/method.h>
 #include <executorch/runtime/executor/program.h>
 #include <executorch/runtime/platform/log.h>
 #include <executorch/runtime/platform/runtime.h>

 static std::array<uint8_t, 4 * 1024U * 1024U> method_allocator_pool; // 4MB

 DEFINE_string(
     bundled_program_path,
     "model_bundled.bpte",
     "Model serialized in flatbuffer format.");

 DEFINE_int32(
     testset_idx,
     0,
     "Index of bundled verification set to be run "
     "by bundled model for verification");

 DEFINE_string(
     etdump_path,
     "etdump.etdp",
     "If etdump generation is enabled an etdump will be written out to this path");

 DEFINE_bool(
     output_verification,
     false,
     "Comapre the model output to the reference outputs present in the BundledProgram.");

 DEFINE_bool(
     print_output,
     false,
     "Print the output of the ET model to stdout, if needs.");

 DEFINE_bool(dump_outputs, false, "Dump outputs to etdump file");

 DEFINE_bool(
     dump_intermediate_outputs,
     false,
     "Dump intermediate outputs to etdump file.");

 DEFINE_string(
     debug_output_path,
     "debug_output.bin",
     "Path to dump debug outputs to.");

 DEFINE_int32(
     debug_buffer_size,
     262144, // 256 KB
     "Size of the debug buffer in bytes to allocate for intermediate outputs and program outputs logging.");

 using executorch::etdump::ETDumpGen;
 using executorch::etdump::ETDumpResult;
 using executorch::extension::BufferDataLoader;
 using executorch::runtime::Error;
 using executorch::runtime::EValue;
 using executorch::runtime::EventTracerDebugLogLevel;
 using executorch::runtime::HierarchicalAllocator;
 using executorch::runtime::MemoryAllocator;
 using executorch::runtime::MemoryManager;
 using executorch::runtime::Method;
 using executorch::runtime::MethodMeta;
 using executorch::runtime::Program;
 using executorch::runtime::Result;
 using executorch::runtime::Span;

 std::vector<uint8_t> load_file_or_die(const char* path) {
   std::ifstream file(path, std::ios::binary | std::ios::ate);
   const size_t nbytes = file.tellg();
   file.seekg(0, std::ios::beg);
   auto file_data = std::vector<uint8_t>(nbytes);
   ET_CHECK_MSG(
       file.read(reinterpret_cast<char*>(file_data.data()), nbytes),
       "Could not load contents of file '%s'",
       path);
   return file_data;
 }

 int main(int argc, char** argv) {
   executorch::runtime::runtime_init();

   gflags::ParseCommandLineFlags(&argc, &argv, true);
   if (argc != 1) {
     std::string msg = "Extra commandline args:";
     for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
       msg += std::string(" ") + argv[i];
     }
     ET_LOG(Error, "%s", msg.c_str());
     return 1;
   }

   // Read in the entire file.
   const char* bundled_program_path = FLAGS_bundled_program_path.c_str();
   std::vector<uint8_t> file_data = load_file_or_die(bundled_program_path);

   // Find the offset to the embedded Program.
   const void* program_data;
   size_t program_data_len;
   Error status = executorch::bundled_program::get_program_data(
       reinterpret_cast<void*>(file_data.data()),
       file_data.size(),
       &program_data,
       &program_data_len);
   ET_CHECK_MSG(
       status == Error::Ok,
       "get_program_data() failed on file '%s': 0x%x",
       bundled_program_path,
       (unsigned int)status);

   auto buffer_data_loader = BufferDataLoader(program_data, program_data_len);

   // Parse the program file. This is immutable, and can also be reused
   // between multiple execution invocations across multiple threads.
   Result<Program> program = Program::load(&buffer_data_loader);
   if (!program.ok()) {
     ET_LOG(Error, "Failed to parse model file %s", bundled_program_path);
     return 1;
   }
   ET_LOG(Info, "Model file %s is loaded.", bundled_program_path);

   // Use the first method in the program.
   const char* method_name = nullptr;
   {
     const auto method_name_result = program->get_method_name(0);
     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
     method_name = *method_name_result;
   }
   ET_LOG(Info, "Running method %s", method_name);

   // MethodMeta describes the memory requirements of the method.
   Result<MethodMeta> method_meta = program->method_meta(method_name);
   ET_CHECK_MSG(
       method_meta.ok(),
       "Failed to get method_meta for %s: 0x%x",
       method_name,
       (unsigned int)method_meta.error());

   //
   // The runtime does not use malloc/new; it allocates all memory using the
   // MemoryManger provided by the client. Clients are responsible for allocating
   // the memory ahead of time, or providing MemoryAllocator subclasses that can
   // do it dynamically.
   //

   // The method allocator is used to allocate all dynamic C++ metadata/objects
   // used to represent the loaded method. This allocator is only used during
   // loading a method of the program, which will return an error if there was
   // not enough memory.
   //
   // The amount of memory required depends on the loaded method and the runtime
   // code itself. The amount of memory here is usually determined by running the
   // method and seeing how much memory is actually used, though it's possible to
   // subclass MemoryAllocator so that it calls malloc() under the hood (see
   // MallocMemoryAllocator).
   //
   // In this example we use a statically allocated memory pool.
   MemoryAllocator method_allocator{MemoryAllocator(
       sizeof(method_allocator_pool), method_allocator_pool.data())};

   // The memory-planned buffers will back the mutable tensors used by the
   // method. The sizes of these buffers were determined ahead of time during the
   // memory-planning pasees.
   //
   // Each buffer typically corresponds to a different hardware memory bank. Most
   // mobile environments will only have a single buffer. Some embedded
   // environments may have more than one for, e.g., slow/large DRAM and
   // fast/small SRAM, or for memory associated with particular cores.
   std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
   std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
   size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
   for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
     // .get() will always succeed because id < num_memory_planned_buffers.
     size_t buffer_size =
         static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
     ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
     planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
     planned_spans.push_back({planned_buffers.back().get(), buffer_size});
   }
   HierarchicalAllocator planned_memory(
       {planned_spans.data(), planned_spans.size()});

   // Assemble all of the allocators into the MemoryManager that the Executor
   // will use.
   MemoryManager memory_manager(&method_allocator, &planned_memory);

   //
   // Load the method from the program, using the provided allocators. Running
   // the method can mutate the memory-planned buffers, so the method should only
   // be used by a single thread at at time, but it can be reused.
   //
   ETDumpGen etdump_gen;
   Result<Method> method =
       program->load_method(method_name, &memory_manager, &etdump_gen);
   ET_CHECK_MSG(
       method.ok(),
       "Loading of method %s failed with status 0x%" PRIx32,
       method_name,
       method.error());
   ET_LOG(Info, "Method loaded.");

   void* debug_buffer = malloc(FLAGS_debug_buffer_size);
   if (FLAGS_dump_intermediate_outputs) {
     Span<uint8_t> buffer((uint8_t*)debug_buffer, FLAGS_debug_buffer_size);
     etdump_gen.set_debug_buffer(buffer);
     etdump_gen.set_event_tracer_debug_level(
         EventTracerDebugLogLevel::kIntermediateOutputs);
   } else if (FLAGS_dump_outputs) {
     Span<uint8_t> buffer((uint8_t*)debug_buffer, FLAGS_debug_buffer_size);
     etdump_gen.set_debug_buffer(buffer);
     etdump_gen.set_event_tracer_debug_level(
         EventTracerDebugLogLevel::kProgramOutputs);
   }
   // Use the inputs embedded in the bundled program.
   status = executorch::bundled_program::load_bundled_input(
       *method, file_data.data(), FLAGS_testset_idx);
   ET_CHECK_MSG(
       status == Error::Ok,
       "LoadBundledInput failed with status 0x%" PRIx32,
       status);

   ET_LOG(Info, "Inputs prepared.");

   // Run the model.
   status = method->execute();
   ET_CHECK_MSG(
       status == Error::Ok,
       "Execution of method %s failed with status 0x%" PRIx32,
       method_name,
       status);
   ET_LOG(Info, "Model executed successfully.");

   // Print the outputs.
   if (FLAGS_print_output) {
     std::vector<EValue> outputs(method->outputs_size());
     status = method->get_outputs(outputs.data(), outputs.size());
     ET_CHECK(status == Error::Ok);
     for (EValue& output : outputs) {
       // TODO(T159700776): This assumes that all outputs are fp32 tensors. Add
       // support for other EValues and Tensor dtypes, and print tensors in a
       // more readable way.
       auto output_tensor = output.toTensor();
       auto data_output = output_tensor.const_data_ptr<float>();
       for (size_t j = 0; j < output_tensor.numel(); ++j) {
         ET_LOG(Info, "%f", data_output[j]);
       }
     }
   }

   // Dump the etdump data containing profiling/debugging data to the specified
   // file.
   ETDumpResult result = etdump_gen.get_etdump_data();
   if (result.buf != nullptr && result.size > 0) {
     FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+");
     fwrite((uint8_t*)result.buf, 1, result.size, f);
     fclose(f);
     free(result.buf);
   }

   if (FLAGS_output_verification) {
     // Verify the outputs.
     status = executorch::bundled_program::verify_method_outputs(
         *method,
         file_data.data(),
         FLAGS_testset_idx,
         1e-3, // rtol
         1e-5 // atol
     );
     ET_CHECK_MSG(
         status == Error::Ok,
         "Bundle verification failed with status 0x%" PRIx32,
         status);
     ET_LOG(Info, "Model verified successfully.");
   }

   if (FLAGS_dump_outputs || FLAGS_dump_intermediate_outputs) {
     FILE* f = fopen(FLAGS_debug_output_path.c_str(), "w+");
     fwrite((uint8_t*)debug_buffer, 1, FLAGS_debug_buffer_size, f);
     fclose(f);
   }
   free(debug_buffer);

   return 0;
 }
	/*
	* Copyright (c) Meta Platforms, Inc. and affiliates.
	* All rights reserved.
	*
	* This source code is licensed under the BSD-style license found in the
	* LICENSE file in the root directory of this source tree.
	*/

	/**
	* @file
	*
	* This tool can run ExecuTorch model files that only use operators that
	* are covered by the portable kernels, with possible delegate to the
	* test_backend_compiler_lib.
	*
	* It sets all input tensor data to ones, and assumes that the outputs are
	* all fp32 tensors.
	*/

	#include <fstream>
	#include <memory>

	#include <gflags/gflags.h>

	#include <executorch/devtools/bundled_program/bundled_program.h>
	#include <executorch/devtools/etdump/etdump_flatcc.h>
	#include <executorch/extension/data_loader/buffer_data_loader.h>
	#include <executorch/runtime/executor/method.h>
	#include <executorch/runtime/executor/program.h>
	#include <executorch/runtime/platform/log.h>
	#include <executorch/runtime/platform/runtime.h>

	static std::array<uint8_t, 4 * 1024U * 1024U> method_allocator_pool; // 4MB

	DEFINE_string(
	bundled_program_path,
	"model_bundled.bpte",
	"Model serialized in flatbuffer format.");

	DEFINE_int32(
	testset_idx,
	0,
	"Index of bundled verification set to be run "
	"by bundled model for verification");

	DEFINE_string(
	etdump_path,
	"etdump.etdp",
	"If etdump generation is enabled an etdump will be written out to this path");

	DEFINE_bool(
	output_verification,
	false,
	"Comapre the model output to the reference outputs present in the BundledProgram.");

	DEFINE_bool(
	print_output,
	false,
	"Print the output of the ET model to stdout, if needs.");

	DEFINE_bool(dump_outputs, false, "Dump outputs to etdump file");

	DEFINE_bool(
	dump_intermediate_outputs,
	false,
	"Dump intermediate outputs to etdump file.");

	DEFINE_string(
	debug_output_path,
	"debug_output.bin",
	"Path to dump debug outputs to.");

	DEFINE_int32(
	debug_buffer_size,
	262144, // 256 KB
	"Size of the debug buffer in bytes to allocate for intermediate outputs and program outputs logging.");

	using executorch::etdump::ETDumpGen;
	using executorch::etdump::ETDumpResult;
	using executorch::extension::BufferDataLoader;
	using executorch::runtime::Error;
	using executorch::runtime::EValue;
	using executorch::runtime::EventTracerDebugLogLevel;
	using executorch::runtime::HierarchicalAllocator;
	using executorch::runtime::MemoryAllocator;
	using executorch::runtime::MemoryManager;
	using executorch::runtime::Method;
	using executorch::runtime::MethodMeta;
	using executorch::runtime::Program;
	using executorch::runtime::Result;
	using executorch::runtime::Span;

	std::vector<uint8_t> load_file_or_die(const char* path) {
	std::ifstream file(path, std::ios::binary \| std::ios::ate);
	const size_t nbytes = file.tellg();
	file.seekg(0, std::ios::beg);
	auto file_data = std::vector<uint8_t>(nbytes);
	ET_CHECK_MSG(
	file.read(reinterpret_cast<char*>(file_data.data()), nbytes),
	"Could not load contents of file '%s'",
	path);
	return file_data;
	}

	int main(int argc, char** argv) {
	executorch::runtime::runtime_init();

	gflags::ParseCommandLineFlags(&argc, &argv, true);
	if (argc != 1) {
	std::string msg = "Extra commandline args:";
	for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
	msg += std::string(" ") + argv[i];
	}
	ET_LOG(Error, "%s", msg.c_str());
	return 1;
	}

	// Read in the entire file.
	const char* bundled_program_path = FLAGS_bundled_program_path.c_str();
	std::vector<uint8_t> file_data = load_file_or_die(bundled_program_path);

	// Find the offset to the embedded Program.
	const void* program_data;
	size_t program_data_len;
	Error status = executorch::bundled_program::get_program_data(
	reinterpret_cast<void*>(file_data.data()),
	file_data.size(),
	&program_data,
	&program_data_len);
	ET_CHECK_MSG(
	status == Error::Ok,
	"get_program_data() failed on file '%s': 0x%x",
	bundled_program_path,
	(unsigned int)status);

	auto buffer_data_loader = BufferDataLoader(program_data, program_data_len);

	// Parse the program file. This is immutable, and can also be reused
	// between multiple execution invocations across multiple threads.
	Result<Program> program = Program::load(&buffer_data_loader);
	if (!program.ok()) {
	ET_LOG(Error, "Failed to parse model file %s", bundled_program_path);
	return 1;
	}
	ET_LOG(Info, "Model file %s is loaded.", bundled_program_path);

	// Use the first method in the program.
	const char* method_name = nullptr;
	{
	const auto method_name_result = program->get_method_name(0);
	ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
	method_name = *method_name_result;
	}
	ET_LOG(Info, "Running method %s", method_name);

	// MethodMeta describes the memory requirements of the method.
	Result<MethodMeta> method_meta = program->method_meta(method_name);
	ET_CHECK_MSG(
	method_meta.ok(),
	"Failed to get method_meta for %s: 0x%x",
	method_name,
	(unsigned int)method_meta.error());

	//
	// The runtime does not use malloc/new; it allocates all memory using the
	// MemoryManger provided by the client. Clients are responsible for allocating
	// the memory ahead of time, or providing MemoryAllocator subclasses that can
	// do it dynamically.
	//

	// The method allocator is used to allocate all dynamic C++ metadata/objects
	// used to represent the loaded method. This allocator is only used during
	// loading a method of the program, which will return an error if there was
	// not enough memory.
	//
	// The amount of memory required depends on the loaded method and the runtime
	// code itself. The amount of memory here is usually determined by running the
	// method and seeing how much memory is actually used, though it's possible to
	// subclass MemoryAllocator so that it calls malloc() under the hood (see
	// MallocMemoryAllocator).
	//
	// In this example we use a statically allocated memory pool.
	MemoryAllocator method_allocator{MemoryAllocator(
	sizeof(method_allocator_pool), method_allocator_pool.data())};

	// The memory-planned buffers will back the mutable tensors used by the
	// method. The sizes of these buffers were determined ahead of time during the
	// memory-planning pasees.
	//
	// Each buffer typically corresponds to a different hardware memory bank. Most
	// mobile environments will only have a single buffer. Some embedded
	// environments may have more than one for, e.g., slow/large DRAM and
	// fast/small SRAM, or for memory associated with particular cores.
	std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
	std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
	size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
	for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
	// .get() will always succeed because id < num_memory_planned_buffers.
	size_t buffer_size =
	static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
	ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
	planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
	planned_spans.push_back({planned_buffers.back().get(), buffer_size});
	}
	HierarchicalAllocator planned_memory(
	{planned_spans.data(), planned_spans.size()});

	// Assemble all of the allocators into the MemoryManager that the Executor
	// will use.
	MemoryManager memory_manager(&method_allocator, &planned_memory);

	//
	// Load the method from the program, using the provided allocators. Running
	// the method can mutate the memory-planned buffers, so the method should only
	// be used by a single thread at at time, but it can be reused.
	//
	ETDumpGen etdump_gen;
	Result<Method> method =
	program->load_method(method_name, &memory_manager, &etdump_gen);
	ET_CHECK_MSG(
	method.ok(),
	"Loading of method %s failed with status 0x%" PRIx32,
	method_name,
	method.error());
	ET_LOG(Info, "Method loaded.");

	void* debug_buffer = malloc(FLAGS_debug_buffer_size);
	if (FLAGS_dump_intermediate_outputs) {
	Span<uint8_t> buffer((uint8_t*)debug_buffer, FLAGS_debug_buffer_size);
	etdump_gen.set_debug_buffer(buffer);
	etdump_gen.set_event_tracer_debug_level(
	EventTracerDebugLogLevel::kIntermediateOutputs);
	} else if (FLAGS_dump_outputs) {
	Span<uint8_t> buffer((uint8_t*)debug_buffer, FLAGS_debug_buffer_size);
	etdump_gen.set_debug_buffer(buffer);
	etdump_gen.set_event_tracer_debug_level(
	EventTracerDebugLogLevel::kProgramOutputs);
	}
	// Use the inputs embedded in the bundled program.
	status = executorch::bundled_program::load_bundled_input(
	*method, file_data.data(), FLAGS_testset_idx);
	ET_CHECK_MSG(
	status == Error::Ok,
	"LoadBundledInput failed with status 0x%" PRIx32,
	status);

	ET_LOG(Info, "Inputs prepared.");

	// Run the model.
	status = method->execute();
	ET_CHECK_MSG(
	status == Error::Ok,
	"Execution of method %s failed with status 0x%" PRIx32,
	method_name,
	status);
	ET_LOG(Info, "Model executed successfully.");

	// Print the outputs.
	if (FLAGS_print_output) {
	std::vector<EValue> outputs(method->outputs_size());
	status = method->get_outputs(outputs.data(), outputs.size());
	ET_CHECK(status == Error::Ok);
	for (EValue& output : outputs) {
	// TODO(T159700776): This assumes that all outputs are fp32 tensors. Add
	// support for other EValues and Tensor dtypes, and print tensors in a
	// more readable way.
	auto output_tensor = output.toTensor();
	auto data_output = output_tensor.const_data_ptr<float>();
	for (size_t j = 0; j < output_tensor.numel(); ++j) {
	ET_LOG(Info, "%f", data_output[j]);
	}
	}
	}

	// Dump the etdump data containing profiling/debugging data to the specified
	// file.
	ETDumpResult result = etdump_gen.get_etdump_data();
	if (result.buf != nullptr && result.size > 0) {
	FILE* f = fopen(FLAGS_etdump_path.c_str(), "w+");
	fwrite((uint8_t*)result.buf, 1, result.size, f);
	fclose(f);
	free(result.buf);
	}

	if (FLAGS_output_verification) {
	// Verify the outputs.
	status = executorch::bundled_program::verify_method_outputs(
	*method,
	file_data.data(),
	FLAGS_testset_idx,
	1e-3, // rtol
	1e-5 // atol
	);
	ET_CHECK_MSG(
	status == Error::Ok,
	"Bundle verification failed with status 0x%" PRIx32,
	status);
	ET_LOG(Info, "Model verified successfully.");
	}

	if (FLAGS_dump_outputs \|\| FLAGS_dump_intermediate_outputs) {
	FILE* f = fopen(FLAGS_debug_output_path.c_str(), "w+");
	fwrite((uint8_t*)debug_buffer, 1, FLAGS_debug_buffer_size, f);
	fclose(f);
	}
	free(debug_buffer);

	return 0;
	}