| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. |
| */ |
| |
| #include <gflags/gflags.h> |
| |
| #include <executorch/extension/data_loader/buffer_data_loader.h> |
| #include <executorch/extension/data_loader/file_data_loader.h> |
| #include <executorch/runtime/executor/method.h> |
| #include <executorch/runtime/executor/program.h> |
| #include <executorch/runtime/platform/log.h> |
| #include <executorch/runtime/platform/profiler.h> |
| #include <executorch/runtime/platform/runtime.h> |
| #include <executorch/sdk/etdump/etdump.h> |
| #include <executorch/util/bundled_program_verification.h> |
| #include <executorch/util/util.h> |
| #ifdef USE_ATEN_LIB |
| #include <c10/core/impl/LocalDispatchKeySet.h> |
| #endif |
| |
| #if !defined(USE_ATEN_LIB) |
| #include <executorch/backends/xnnpack/threadpool/fb/threadpool_use_n_threads.h> |
| #include <executorch/backends/xnnpack/threadpool/threadpool.h> |
| #endif |
| |
| // This tool includes all of the headers necessary to execute a model. |
| // Demonstrate that those headers do not expose the internal flatbuffers |
| // headers. |
| #ifdef FLATBUFFERS_VERSION_MAJOR |
| // FLATBUFFERS_VERSION_MAJOR is defined by flatbuffers/base.h, which is included |
| // by all other flatbuffers library headers and by any generated headers. If |
| // it's present, it means that this file is including a flatbuffers header |
| // somewhere. |
| #error "The executorch headers must not expose flatbuffers.h" |
| #endif |
| |
| using namespace torch::executor; |
| |
| static constexpr size_t kRuntimeMemorySize = 4 * 1024U * 1024U; // 4 MB |
| static uint8_t runtime_pool[kRuntimeMemorySize]; |
| static constexpr size_t kBundledAllocatorPoolSize = 16 * 1024U; |
| static uint8_t bundled_allocator_pool[kBundledAllocatorPoolSize]; |
| |
| DEFINE_bool( |
| bundled_program, |
| false, |
| "True for running bundled program, false for executorch_flatbuffer::program"); |
| |
| DEFINE_bool( |
| generate_etdump, |
| false, |
| "If enabled etdump containing profiling data will be generated"); |
| |
| DEFINE_string( |
| etdump_path, |
| "etdump.etdp", |
| "If etdump generation is enabled an etdump will be written out to this path"); |
| |
| DEFINE_string( |
| prof_result_path, |
| "prof_result.bin", |
| "Executorch profiler output path."); |
| |
| DEFINE_bool(print_output, false, "Prints output of the model."); |
| |
| DEFINE_int32(num_iters, 1, "Number of inference iterations to run."); |
| |
| DEFINE_string( |
| model_path, |
| "model.pte", |
| "Model serialized in flatbuffer format."); |
| |
| DEFINE_int32(num_threads, 1, "Number of threads to use."); |
| |
| DEFINE_int32( |
| testset_idx, |
| 0, |
| "Index of bundled verification set to be run " |
| "by bundled model for verification"); |
| |
| DEFINE_double( |
| rtol, |
| 1e-5, |
| "The relative tolerance used for bundled program verification."); |
| |
| DEFINE_double( |
| atol, |
| 1e-8, |
| "The absolute tolerance used for bundled program verification."); |
| |
| /** |
| * Helps handle bundled and non-bundled program inputs. |
| */ |
| class ProgramData { |
| public: |
| /** |
| * Tries loading the named file as a plain Program or a bundled program, |
| * failing with an ET_CHECK on any failure. |
| */ |
| static ProgramData load_or_die(std::string& filename) { |
| // Create a DataLoader that wraps the input file. It may be a plain Program, |
| // or it may be a BundledProgram that contains a Program. |
| Result<util::FileDataLoader> loader = |
| util::FileDataLoader::From(filename.c_str()); |
| ET_CHECK_MSG( |
| loader.ok(), |
| "Could not create loader for file '%s': 0x%x", |
| filename.c_str(), |
| (unsigned int)loader.error()); |
| |
| // Figure out the file type. Create a scope to destroy the header after the |
| // check. |
| { |
| Result<FreeableBuffer> header = |
| loader->Load(/*offset=*/0, Program::kMinHeadBytes); |
| ET_CHECK_MSG( |
| header.ok(), |
| "Could not load header of file '%s': 0x%x", |
| filename.c_str(), |
| (unsigned int)loader.error()); |
| Program::HeaderStatus hs = |
| Program::check_header(header->data(), header->size()); |
| if (hs == Program::HeaderStatus::CompatibleVersion) { |
| // It's a plain Program. We can use the existing loader, and there is no |
| // bundled program data. |
| return ProgramData( |
| new util::FileDataLoader(std::move(*loader)), |
| /*bundled_program_data=*/FreeableBuffer()); |
| } |
| } |
| |
| // Try treating it as a bundled program. |
| |
| // Read in the entire file. |
| Result<FreeableBuffer> file_data = loader->Load(0, loader->size().get()); |
| ET_CHECK_MSG( |
| file_data.ok(), |
| "Could not load contents of file '%s': 0x%x", |
| filename.c_str(), |
| (unsigned int)file_data.error()); |
| |
| // Find the offset to the embedded Program. |
| const void* program_data; |
| size_t program_data_len; |
| Error status = torch::executor::util::GetProgramData( |
| const_cast<void*>(file_data->data()), |
| file_data->size(), |
| &program_data, |
| &program_data_len); |
| ET_CHECK_MSG( |
| status == Error::Ok, |
| "GetProgramData() failed on file '%s': 0x%x", |
| filename.c_str(), |
| (unsigned int)status); |
| |
| // Wrap the Program in a loader, and pass on the FreeableBuffer that |
| // contains the full bundled program data. |
| return ProgramData( |
| new util::BufferDataLoader(program_data, program_data_len), |
| std::move(*file_data)); |
| } |
| |
| /** |
| * Returns the loader for the plain Program. May or may not be inside a |
| * bundled program wrapper. |
| */ |
| DataLoader* program_loader() { |
| return loader_.get(); |
| } |
| |
| /** |
| * If the file was a bundled program, returns a pointer to the file data. |
| * Otherwise returns nullptr. |
| */ |
| const void* bundled_program_data() const { |
| if (bundled_program_data_.size() > 0) { |
| return bundled_program_data_.data(); |
| } else { |
| return nullptr; |
| } |
| } |
| |
| private: |
| /// Takes ownership of both params. |
| ProgramData(DataLoader* loader, FreeableBuffer&& bundled_program_data) |
| : loader_(loader), |
| bundled_program_data_(std::move(bundled_program_data)) {} |
| |
| std::unique_ptr<DataLoader> loader_; |
| FreeableBuffer bundled_program_data_; |
| }; |
| |
| int main(int argc, char** argv) { |
| torch::executor::runtime_init(); |
| |
| gflags::ParseCommandLineFlags(&argc, &argv, true); |
| if (argc != 1) { |
| std::string msg = "Extra commandline args: "; |
| for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) { |
| msg += argv[i]; |
| } |
| ET_LOG(Error, "%s", msg.c_str()); |
| return 1; |
| } |
| |
| ET_CHECK_MSG( |
| FLAGS_num_threads >= 1, |
| "Please specifiy valid number of threads to use."); |
| |
| // Load the file. |
| auto program_data = ProgramData::load_or_die(FLAGS_model_path); |
| |
| // Parse the program file. This is immutable, and can also be reused between |
| // multiple execution invocations across multiple threads. |
| uint32_t prof_tok = EXECUTORCH_BEGIN_PROF("de-serialize model"); |
| Result<Program> program = |
| torch::executor::Program::Load(program_data.program_loader()); |
| EXECUTORCH_END_PROF(prof_tok); |
| if (!program.ok()) { |
| ET_LOG(Error, "Failed to parse model file %s", FLAGS_model_path.c_str()); |
| return 1; |
| } |
| ET_LOG(Info, "Model file %s is loaded.", FLAGS_model_path.c_str()); |
| |
| // Use the first method in the program. |
| const size_t method_index = 0; |
| const char* method_name = nullptr; |
| { |
| const auto method_name_result = program->get_method_name(method_index); |
| ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); |
| method_name = *method_name_result; |
| } |
| ET_LOG(Info, "Running method %s", method_name); |
| |
| // |
| // The runtime does not use malloc/new; it allocates all memory using the |
| // MemoryManger provided by the client. Clients are responsible for allocating |
| // the memory ahead of time, or providing MemoryAllocator subclasses that can |
| // do it dynamically. |
| // |
| |
| // The runtime allocator is used to allocate all dynamic C++ metadata/objects |
| // used to represent the loaded program. This allocator is only used during |
| // Program::load_method(), which will return an error if there was not enough |
| // memory. |
| // |
| // The amount of memory required depends on the loaded program and the runtime |
| // code itself. The amount of memory here is usually determined by running the |
| // program and seeing how much memory is actually used, though it's possible |
| // to subclass MemoryAllocator so that it calls malloc() under the hood. |
| MemoryAllocator runtime_allocator{ |
| MemoryAllocator(kRuntimeMemorySize, runtime_pool)}; |
| runtime_allocator.enable_profiling("runtime allocator"); |
| |
| // The non-const allocator is used to provide the memory-planned buffers that |
| // back mutable tensors. Since it was planned ahead of time, the Program knows |
| // how big each of the allocators needs to be. |
| // |
| // These buffers correspond to different hardware memory banks. Most mobile |
| // environments will only have a single buffer. Some embedded environments may |
| // have more than one for, e.g., slow/large DRAM and fast/small SRAM. |
| std::vector<std::unique_ptr<uint8_t[]>> non_const_buffers; |
| std::vector<MemoryAllocator> non_const_allocators; |
| size_t num_non_const_buffers = 0; |
| { |
| auto result = program->num_non_const_buffers(method_name); |
| ET_CHECK_MSG( |
| result.ok(), |
| "Failed to get number of non-const buffers for method %s: 0x%x", |
| method_name, |
| (unsigned int)result.error()); |
| num_non_const_buffers = *result; |
| } |
| // Note that this loop starts at ID 1, because ID 0 is reserved. But, the |
| // HierarchicalAllocator indices are zero-based, so it's later adjusted by -1. |
| // TODO(T142455629): Make HierarchicalAllocator ID-based to avoid this |
| // memory_id-1. |
| for (size_t id = 1; id < num_non_const_buffers; ++id) { |
| auto buffer_size = program->get_non_const_buffer_size(id, method_name); |
| ET_CHECK_MSG( |
| buffer_size.ok(), |
| "Failed to get size of non-const buffer %zu for method %s: 0x%x", |
| id, |
| method_name, |
| (unsigned int)buffer_size.error()); |
| ET_LOG( |
| Info, "Setting up non-const buffer %zu, size %zu.", id, *buffer_size); |
| non_const_buffers.push_back(std::make_unique<uint8_t[]>(*buffer_size)); |
| // Since the list of allocators began empty, buffer ID N will live at index |
| // N-1. |
| non_const_allocators.push_back( |
| MemoryAllocator(*buffer_size, non_const_buffers.back().get())); |
| non_const_allocators.back().enable_profiling("non_const_allocators"); |
| } |
| HierarchicalAllocator non_const_allocator( |
| non_const_allocators.size(), non_const_allocators.data()); |
| |
| // The constant allocator is not currently used. Please initialize with a |
| // zero-sized allocator. |
| MemoryAllocator const_allocator{MemoryAllocator(0, nullptr)}; |
| const_allocator.enable_profiling("const allocator"); |
| |
| // The kernel temporary allocator is not currently used. Please initialize |
| // with a zero-sized allocator. |
| MemoryAllocator temp_allocator{MemoryAllocator(0, nullptr)}; |
| temp_allocator.enable_profiling("temp allocator"); |
| |
| // Allocator for bundled input. |
| MemoryAllocator bundled_input_allocator{ |
| MemoryAllocator(kBundledAllocatorPoolSize, bundled_allocator_pool)}; |
| |
| // Assemble all of the allocators into the MemoryManager that the Executor |
| // will use. |
| MemoryManager memory_manager( |
| &const_allocator, |
| &non_const_allocator, |
| &runtime_allocator, |
| &temp_allocator); |
| |
| // |
| // Load the named method from the Program, using the provided allocators. The |
| // Method is what actually performs the inference. It is mutable and not |
| // locked, so should only used by a single thread at a time. But, it can be |
| // reused for multiple inferences. |
| // |
| |
| prof_tok = EXECUTORCH_BEGIN_PROF("load model"); |
| Result<Method> method = program->load_method(method_name, &memory_manager); |
| EXECUTORCH_END_PROF(prof_tok); |
| ET_CHECK_MSG( |
| method.ok(), |
| "load_method() failed with status 0x%" PRIx32, |
| method.error()); |
| |
| ET_LOG(Info, "Model initialized."); |
| |
| #ifdef USE_ATEN_LIB |
| // [TLS handling] This is to workaround an assertion failure |
| // (https://fburl.com/code/302jyn8d) running `gelu` in ATen mode in fbcode |
| // (such as bento). The problem is Executorch ATen mode doesn't have Thread |
| // Local State, but `torch-cpp` is assuming tls init is done. There are two |
| // more checks: MKLDNN disabled and C10_MOBILE, if any of them is true we |
| // won't be hitting this assertion error. However in `torch-cpp` lib both |
| // checks are false. Production impact: this should not make any impact in |
| // production environment, given that in xplat we are depending on a library |
| // that enables C10_MOBILE (`torch_mobile_core`). |
| c10::impl::ExcludeDispatchKeyGuard no_autograd(c10::autograd_dispatch_keyset); |
| #endif |
| |
| #if !defined(USE_ATEN_LIB) |
| // To enable intra-op parallelism |
| // This sets the # of threads to use for running executorch model |
| // to num_threads. Applicable to lean mode. |
| torch::executorch::threadpool::UseNThreadsThreadPoolGuard thread_pool_guard( |
| FLAGS_num_threads); |
| ET_CHECK_MSG( |
| thread_pool_guard.guard_armed(), |
| "Could not set # of threads to use. " |
| "Num threads requested is %d, Threadpool size is: %ld", |
| FLAGS_num_threads, |
| torch::executorch::threadpool::get_threadpool()->get_thread_count()); |
| #endif |
| // Run the model multiple times if requested. |
| Error status; |
| for (size_t i = 0; i < FLAGS_num_iters; i++) { |
| // Prepare the inputs. |
| exec_aten::ArrayRef<void*> inputs; |
| if (FLAGS_bundled_program) { |
| // Use the inputs embedded in the bundled program. |
| status = torch::executor::util::LoadBundledInput( |
| *method, |
| program_data.bundled_program_data(), |
| &bundled_input_allocator, |
| method_index, |
| FLAGS_testset_idx); |
| ET_CHECK_MSG( |
| status == Error::Ok, |
| "LoadBundledInput failed with status 0x%" PRIx32, |
| status); |
| } else { |
| // Use ones-initialized inputs. |
| inputs = torch::executor::util::PrepareInputTensors(*method); |
| } |
| ET_LOG(Info, "Inputs prepared."); |
| |
| // Run the model. |
| EXECUTORCH_PROFILE_CREATE_BLOCK("inference loop"); |
| prof_tok = EXECUTORCH_BEGIN_PROF("run model"); |
| status = method->execute(); |
| EXECUTORCH_END_PROF(prof_tok); |
| ET_CHECK_MSG( |
| status == Error::Ok, |
| "method->execute() failed with status 0x%" PRIx32, |
| status); |
| ET_LOG(Info, "Model executed successfully."); |
| |
| // Handle the outputs. |
| if (FLAGS_bundled_program) { |
| status = torch::executor::util::VerifyResultWithBundledExpectedOutput( |
| *method, |
| program_data.bundled_program_data(), |
| &bundled_input_allocator, |
| method_index, |
| FLAGS_testset_idx, |
| FLAGS_rtol, |
| FLAGS_atol); |
| ET_CHECK_MSG( |
| status == Error::Ok, |
| "Bundle verification failed with status 0x%" PRIx32, |
| status); |
| ET_LOG(Info, "Model verified successfully."); |
| } else { |
| torch::executor::util::FreeInputs(inputs); |
| } |
| } |
| |
| // Print the outputs if requested. |
| if (FLAGS_print_output) { |
| auto output_list = std::make_unique<EValue[]>(method->outputs_size()); |
| status = method->get_outputs(output_list.get(), method->outputs_size()); |
| ET_CHECK_MSG( |
| status == Error::Ok, |
| "get_outputs failed with status 0x%" PRIx32, |
| status); |
| |
| // TODO(T139071931): Don't assume that all outputs are tensors. |
| for (size_t i = 0; i < method->outputs_size(); i++) { |
| auto output_tensor = output_list[i].toTensor(); |
| const float* data_output = output_tensor.const_data_ptr<float>(); |
| for (size_t j = 0; j < output_list[i].toTensor().numel(); ++j) { |
| ET_LOG(Info, "%f", data_output[j]); |
| } |
| } |
| } |
| |
| // Dump the profiling data to the specified file. |
| torch::executor::prof_result_t prof_result; |
| EXECUTORCH_DUMP_PROFILE_RESULTS(&prof_result); |
| if (prof_result.num_bytes != 0) { |
| FILE* ptr = fopen(FLAGS_prof_result_path.c_str(), "w+"); |
| fwrite(prof_result.prof_data, 1, prof_result.num_bytes, ptr); |
| fclose(ptr); |
| } |
| |
| if (FLAGS_generate_etdump) { |
| ETDump et_dump(runtime_allocator); |
| auto ret = |
| et_dump.serialize_prof_results_to_etdump(FLAGS_etdump_path.c_str()); |
| if (ret != torch::executor::Error::Ok) { |
| ET_LOG(Error, "Failed to serialize and write out etdump data."); |
| return -1; |
| } |
| } |
| |
| return 0; |
| } |