| // Code in this file is a heavily modified version of the dynamic loader |
| // from android's bionic library. Here is the license for that project: |
| |
| /* |
| * Copyright (C) 2016 The Android Open Source Project |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| #include <dlfcn.h> |
| #include <elf.h> |
| #include <fcntl.h> |
| #include <libgen.h> |
| #include <link.h> |
| #include <sys/mman.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| #include <atomic> |
| #include <cerrno> |
| #include <cinttypes> |
| #include <climits> |
| #include <cstdint> |
| #include <cstring> |
| #include <functional> |
| #include <iostream> |
| #include <sstream> |
| #include <stdexcept> |
| #include <thread> |
| #include <vector> |
| // Get PAGE_SIZE and PAGE_MASK. |
| #include <sys/user.h> |
| |
| #include <c10/util/Optional.h> |
| #include <c10/util/irange.h> |
| |
| #include <fmt/format.h> |
| #include <torch/csrc/deploy/loader.h> |
| #include <torch/csrc/deploy/mem_file.h> |
| |
| namespace torch { |
| namespace deploy { |
| |
| #define DEPLOY_ERROR(msg_fmt, ...) \ |
| throw DeployLoaderError(fmt::format(msg_fmt, ##__VA_ARGS__)) |
| |
| #define DEPLOY_CHECK(cond, fmt, ...) \ |
| if (!(cond)) { \ |
| DEPLOY_ERROR(fmt, ##__VA_ARGS__); \ |
| } |
| |
| std::vector<std::string> split_path(const std::string& s, char delim) { |
| const char* cur = s.c_str(); |
| const char* end = cur + s.size(); |
| if (cur == end) { |
| return {}; |
| } |
| std::vector<std::string> result; |
| while (true) { |
| // non-zero amount of chars |
| const char* next = strchr(cur, delim); |
| if (!next) { |
| result.emplace_back(std::string(cur, end)); |
| break; |
| } |
| result.emplace_back(std::string(cur, next)); |
| cur = next + 1; |
| } |
| return result; |
| } |
| |
| // https://stackoverflow.com/questions/23006930/the-shared-library-rpath-and-the-binary-rpath-priority/52647116#52647116 |
| void replace_all( |
| std::string& str, |
| const std::string& from, |
| const std::string& to) { |
| if (from.empty()) |
| return; |
| size_t start_pos = 0; |
| while ((start_pos = str.find(from, start_pos)) != std::string::npos) { |
| str.replace(start_pos, from.length(), to); |
| start_pos += to.length(); // In case 'to' contains 'from', like replacing |
| // 'x' with 'yx' |
| } |
| } |
| |
| std::string resolve_path(const std::string& origin, const std::string& t) { |
| std::string result = t; |
| replace_all(result, "$ORIGIN", origin); |
| // NOLINTNEXTLINE |
| char buf[PATH_MAX]; |
| char* resolved = realpath(result.c_str(), buf); |
| if (!resolved) { |
| return result; |
| } |
| return resolved; |
| } |
| |
| std::string resolve_origin(const std::string& so_name) { |
| // NOLINTNEXTLINE |
| char origin[PATH_MAX]; |
| realpath(so_name.c_str(), origin); |
| dirname(origin); |
| return origin; |
| } |
| |
| template <typename... Args> |
| std::string stringf(const char* format, Args... args) { |
| int size_s = snprintf(nullptr, 0, format, args...); |
| std::string result(size_s + 1, 0); |
| snprintf((char*)result.data(), size_s + 1, format, args...); |
| return result; |
| } |
| // Returns the address of the page containing address 'x'. |
| #define PAGE_START(x) ((x)&PAGE_MASK) |
| |
| // Returns the offset of address 'x' in its page. |
| #define PAGE_OFFSET(x) ((x) & ~PAGE_MASK) |
| |
| // Returns the address of the next page after address 'x', unless 'x' is |
| // itself at the start of a page. |
| #define PAGE_END(x) PAGE_START((x) + (PAGE_SIZE - 1)) |
| |
| // from bionic |
| // returns the size a shared library will take in memory |
| size_t phdr_table_get_load_size( |
| const Elf64_Phdr* phdr_table, |
| size_t phdr_count, |
| Elf64_Addr* out_min_vaddr, |
| Elf64_Addr* out_max_vaddr) { |
| Elf64_Addr min_vaddr = UINTPTR_MAX; |
| Elf64_Addr max_vaddr = 0; |
| |
| bool found_pt_load = false; |
| for (const auto i : c10::irange(phdr_count)) { |
| const Elf64_Phdr* phdr = &phdr_table[i]; |
| |
| if (phdr->p_type != PT_LOAD) { |
| continue; |
| } |
| found_pt_load = true; |
| |
| if (phdr->p_vaddr < min_vaddr) { |
| min_vaddr = phdr->p_vaddr; |
| } |
| |
| if (phdr->p_vaddr + phdr->p_memsz > max_vaddr) { |
| max_vaddr = phdr->p_vaddr + phdr->p_memsz; |
| } |
| } |
| if (!found_pt_load) { |
| min_vaddr = 0; |
| } |
| |
| min_vaddr = PAGE_START(min_vaddr); |
| max_vaddr = PAGE_END(max_vaddr); |
| |
| if (out_min_vaddr != nullptr) { |
| *out_min_vaddr = min_vaddr; |
| } |
| if (out_max_vaddr != nullptr) { |
| *out_max_vaddr = max_vaddr; |
| } |
| return max_vaddr - min_vaddr; |
| } |
| |
| #define MAYBE_MAP_FLAG(x, from, to) (((x) & (from)) ? (to) : 0) |
| #define PFLAGS_TO_PROT(x) \ |
| (MAYBE_MAP_FLAG((x), PF_X, PROT_EXEC) | \ |
| MAYBE_MAP_FLAG((x), PF_R, PROT_READ) | \ |
| MAYBE_MAP_FLAG((x), PF_W, PROT_WRITE)) |
| |
| // holds a pre-computed hash for a string that is used in a GNU-style hash |
| // tables and also keeps track of the string length. |
| struct GnuHash { |
| GnuHash(const char* name) { |
| uint32_t h = 5381; |
| const uint8_t* name_bytes = reinterpret_cast<const uint8_t*>(name); |
| #pragma unroll 8 |
| while (*name_bytes != 0) { |
| h += (h << 5) + |
| *name_bytes++; // h*33 + c = h + h * 32 + c = h + h << 5 + c |
| } |
| hash = h; |
| name_len = reinterpret_cast<const char*>(name_bytes) - name; |
| } |
| uint32_t hash; |
| uint32_t name_len; |
| }; |
| |
| // this is a special builtin in the libc++ API used for telling C++ execption |
| // frame unwinding about functions loaded from a pathway other than the libc |
| // loader. it is passed a pointer to where the EH_FRAME section was loaded, |
| // which appears to include frame information relative to that address. |
| extern "C" void __register_frame(void*); |
| extern "C" void __deregister_frame(void*); |
| |
| typedef void (*linker_dtor_function_t)(); |
| typedef void (*linker_ctor_function_t)(int, const char**, char**); |
| |
| // https://refspecs.linuxfoundation.org/LSB_2.1.0/LSB-Core-generic/LSB-Core-generic/ehframehdr.html |
| // note that eh_frame_ptr can be different types based on eh_frame_ptr_enc but |
| // we only support one sepecific encoding that is stored in a int32_t and an |
| // offset relative to the start of this struct. |
| struct EH_Frame_HDR { |
| char version; |
| char eh_frame_ptr_enc; |
| char fde_count_enc; |
| char table_enc; |
| int32_t eh_frame_ptr; |
| }; |
| |
| // this is the libc++ function called to lookup thread local state. |
| // It is passed a pointer to an object of the same shape as TLSEntry |
| // with the module_id and offset. |
| extern "C" void* __tls_get_addr(void*); |
| |
| extern "C" int __cxa_thread_atexit_impl( |
| void (*dtor)(void*), |
| void* obj, |
| void* dso_symbol); |
| |
| struct CustomLibraryImpl; |
| |
| struct TLSMemory { |
| TLSMemory(std::shared_ptr<CustomLibraryImpl> file, size_t size) |
| // NOLINTNEXTLINE |
| : file_(std::move(file)), mem_(malloc(size)) {} |
| std::shared_ptr<CustomLibraryImpl> file_; |
| void* mem_; |
| ~TLSMemory() { |
| // NOLINTNEXTLINE |
| free(mem_); |
| } |
| }; |
| |
| static void delete_TLSMemory(void* obj) { |
| delete ((TLSMemory*)obj); |
| } |
| |
| // This object performs TLS emulation for modules not loaded by dlopen. |
| // Normally modules have a module_id that is used as a key in libc for the |
| // thread local data for that module. However, there is no public API for |
| // assigning this module id. Instead, for modules that we load, we set module_id |
| // to a pointer to a TLSSegment object, and replace __tls_get_addr with a |
| // function that calls `addr`. |
| |
| // libc module_id's are sequential, so we use the top bit as a flag to see |
| // if we have a local TLSegment object instead. This will break if |
| // someone creates 2^63 sequential objects, but it is hard to imagine |
| // a system with enough RAM to do that. |
| constexpr size_t TLS_LOCAL_FLAG = (1ULL << 63); |
| |
| static void* local__tls_get_addr(TLSIndex* idx); |
| |
| /* LLDB puts a breakpoint in this function, and reads __deploy_module_info to |
| * get debug info from library. */ |
| __attribute__((noinline)) void __deploy_register_code() { |
| std::cout << ""; // otherwise the breakpoint doesn't get hit, not sure if |
| // there is a more stable way of doing this. |
| }; |
| |
| struct DeployModuleInfo { |
| const char* name; |
| Elf64_Addr file_addr; |
| size_t file_size; |
| Elf64_Addr load_bias; |
| }; |
| |
| extern "C" { |
| // NOLINTNEXTLINE |
| DeployModuleInfo __deploy_module_info; |
| } |
| |
| // RAII wrapper around dlopen |
| struct __attribute__((visibility("hidden"))) SystemLibraryImpl |
| : public SystemLibrary { |
| SystemLibraryImpl(void* handle, bool steal) |
| : handle_(handle), own_handle_(steal && handle != RTLD_DEFAULT) {} |
| |
| at::optional<Elf64_Addr> sym(const char* name) const override { |
| void* r = dlsym(handle_, name); |
| if (!r) { |
| return at::nullopt; |
| } |
| return (Elf64_Addr)r; |
| } |
| |
| at::optional<TLSIndex> tls_sym(const char* name) const override; |
| |
| ~SystemLibraryImpl() override { |
| if (own_handle_) { |
| dlclose(handle_); |
| } |
| } |
| |
| private: |
| void* handle_; |
| bool own_handle_; |
| }; |
| |
| std::shared_ptr<SystemLibrary> SystemLibrary::create(void* handle, bool steal) { |
| return std::make_shared<SystemLibraryImpl>(handle, steal); |
| } |
| std::shared_ptr<SystemLibrary> SystemLibrary::create( |
| const char* path, |
| int flags) { |
| void* handle = dlopen(path, flags); |
| return SystemLibrary::create(handle, handle != nullptr); |
| } |
| |
| // reads DT_NEEDED and DT_RUNPATH from an unloaded elf file so we can sort out |
| // dependencies before calling dlopen |
| std::pair<const char*, std::vector<const char*>> load_needed_from_elf_file( |
| const char* filename, |
| const char* data) { |
| auto header_ = (Elf64_Ehdr*)data; |
| auto program_headers = (Elf64_Phdr*)(data + header_->e_phoff); |
| auto n_program_headers = header_->e_phnum; |
| const Elf64_Dyn* dynamic = nullptr; |
| for (const auto i : c10::irange(n_program_headers)) { |
| const Elf64_Phdr* phdr = &program_headers[i]; |
| if (phdr->p_type == PT_DYNAMIC) { |
| dynamic = reinterpret_cast<const Elf64_Dyn*>(data + phdr->p_offset); |
| break; |
| } |
| } |
| DEPLOY_CHECK( |
| dynamic, |
| "{}: could not load dynamic section for looking up DT_NEEDED", |
| filename); |
| |
| const char* runpath = ""; |
| std::vector<const char*> needed; |
| |
| auto segment_headers = (Elf64_Shdr*)(data + header_->e_shoff); |
| size_t n_segments = header_->e_shnum; |
| const char* strtab = nullptr; |
| |
| const char* segment_string_table = |
| data + segment_headers[header_->e_shstrndx].sh_offset; |
| |
| for (const auto i : c10::irange(n_segments)) { |
| const Elf64_Shdr* shdr = &segment_headers[i]; |
| if (shdr->sh_type == SHT_STRTAB && |
| strcmp(".dynstr", segment_string_table + shdr->sh_name) == 0) { |
| strtab = data + shdr->sh_offset; |
| break; |
| } |
| } |
| |
| DEPLOY_CHECK(strtab, "{}: could not load dynstr for DT_NEEDED", filename); |
| |
| for (const Elf64_Dyn* d = dynamic; d->d_tag != DT_NULL; ++d) { |
| switch (d->d_tag) { |
| case DT_NEEDED: |
| // std::cout << "NEEDED: '" << strtab + d->d_un.d_val << "'\n"; |
| needed.push_back(strtab + d->d_un.d_val); |
| break; |
| case DT_RPATH: /* not quite correct, because this is a different order |
| than runpath, |
| but better than not processing it at all */ |
| case DT_RUNPATH: |
| // std::cout << "RUNPATH: '" << strtab + d->d_un.d_val << "'\n"; |
| runpath = strtab + d->d_un.d_val; |
| break; |
| } |
| } |
| return std::make_pair(runpath, std::move(needed)); |
| } |
| |
| // common mechanism for reading the elf symbol table, |
| // and other information in the PT_DYNAMIC segment. |
| struct ElfDynamicInfo { |
| std::string name_; |
| const Elf64_Dyn* dynamic_ = nullptr; |
| Elf64_Addr load_bias_ = 0; |
| const Elf64_Sym* symtab_ = nullptr; |
| const char* strtab_ = nullptr; |
| size_t strtab_size_ = 0; |
| Elf64_Rela* plt_rela_ = nullptr; |
| size_t n_plt_rela_ = 0; |
| Elf64_Rela* rela_ = nullptr; |
| size_t n_rela_ = 0; |
| linker_ctor_function_t init_func_ = nullptr; |
| linker_ctor_function_t* init_array_ = nullptr; |
| linker_dtor_function_t fini_func_ = nullptr; |
| linker_dtor_function_t* fini_array_ = nullptr; |
| size_t n_init_array_ = 0; |
| size_t n_fini_array_ = 0; |
| size_t gnu_nbucket_ = 0; |
| uint32_t* gnu_bucket_ = nullptr; |
| uint32_t* gnu_chain_ = nullptr; |
| uint32_t gnu_maskwords_ = 0; |
| uint32_t gnu_shift2_ = 0; |
| Elf64_Addr* gnu_bloom_filter_ = nullptr; |
| std::string runpath_; |
| std::vector<const char*> needed_; |
| |
| const char* get_string(int idx) { |
| return strtab_ + idx; |
| } |
| |
| void initialize_from_dynamic_section( |
| std::string name, |
| Elf64_Dyn* dynamic, |
| Elf64_Addr load_bias, |
| bool check_absolute) { |
| name_ = std::move(name); |
| load_bias_ = load_bias; |
| dynamic_ = dynamic; |
| for (const Elf64_Dyn* d = dynamic_; d->d_tag != DT_NULL; ++d) { |
| void* addr = (check_absolute && d->d_un.d_ptr > load_bias_) |
| ? reinterpret_cast<void*>(d->d_un.d_ptr) |
| : reinterpret_cast<void*>(load_bias_ + d->d_un.d_ptr); |
| auto value = d->d_un.d_val; |
| |
| switch (d->d_tag) { |
| case DT_SYMTAB: |
| symtab_ = (Elf64_Sym*)addr; |
| break; |
| case DT_STRTAB: |
| strtab_ = (const char*)addr; |
| break; |
| |
| case DT_STRSZ: |
| strtab_size_ = value; |
| break; |
| |
| case DT_JMPREL: |
| plt_rela_ = (Elf64_Rela*)addr; |
| break; |
| case DT_PLTRELSZ: |
| n_plt_rela_ = value / sizeof(Elf64_Rela); |
| break; |
| case DT_RELA: |
| rela_ = (Elf64_Rela*)addr; |
| break; |
| case DT_RELASZ: |
| n_rela_ = value / sizeof(Elf64_Rela); |
| break; |
| |
| case DT_INIT: |
| init_func_ = reinterpret_cast<linker_ctor_function_t>( |
| load_bias_ + d->d_un.d_ptr); |
| break; |
| |
| case DT_FINI: |
| fini_func_ = reinterpret_cast<linker_dtor_function_t>( |
| load_bias_ + d->d_un.d_ptr); |
| break; |
| |
| case DT_INIT_ARRAY: |
| init_array_ = reinterpret_cast<linker_ctor_function_t*>( |
| load_bias_ + d->d_un.d_ptr); |
| break; |
| |
| case DT_INIT_ARRAYSZ: |
| n_init_array_ = |
| static_cast<uint32_t>(d->d_un.d_val) / sizeof(Elf64_Addr); |
| break; |
| |
| case DT_FINI_ARRAY: |
| fini_array_ = reinterpret_cast<linker_dtor_function_t*>( |
| load_bias_ + d->d_un.d_ptr); |
| break; |
| |
| case DT_FINI_ARRAYSZ: |
| n_fini_array_ = |
| static_cast<uint32_t>(d->d_un.d_val) / sizeof(Elf64_Addr); |
| break; |
| |
| case DT_HASH: |
| break; |
| |
| case DT_GNU_HASH: { |
| gnu_nbucket_ = reinterpret_cast<uint32_t*>(addr)[0]; |
| // skip symndx |
| gnu_maskwords_ = reinterpret_cast<uint32_t*>(addr)[2]; |
| gnu_shift2_ = reinterpret_cast<uint32_t*>(addr)[3]; |
| gnu_bloom_filter_ = |
| reinterpret_cast<Elf64_Addr*>((Elf64_Addr)addr + 16); |
| gnu_bucket_ = |
| reinterpret_cast<uint32_t*>(gnu_bloom_filter_ + gnu_maskwords_); |
| // amend chain for symndx = header[1] |
| gnu_chain_ = |
| gnu_bucket_ + gnu_nbucket_ - reinterpret_cast<uint32_t*>(addr)[1]; |
| --gnu_maskwords_; |
| } break; |
| } |
| } |
| |
| if (!gnu_bucket_) { |
| std::cout << fmt::format( |
| "{}: warning, no DT_GNU_HASH found, symbol lookups on this module will not find anything.\n", |
| name_); |
| } |
| |
| // pass 2 for things that require the strtab_ to be loaded |
| for (const Elf64_Dyn* d = dynamic_; d->d_tag != DT_NULL; ++d) { |
| switch (d->d_tag) { |
| case DT_NEEDED: |
| needed_.push_back(get_string(d->d_un.d_val)); |
| break; |
| case DT_RPATH: /* not quite correct, because this is a different order |
| than runpath, |
| but better than not processing it at all */ |
| case DT_RUNPATH: |
| runpath_ = get_string(d->d_un.d_val); |
| break; |
| } |
| } |
| } |
| |
| at::optional<Elf64_Addr> sym( |
| const char* name, |
| GnuHash* precomputed_hash = nullptr) const { |
| if (!gnu_bucket_) { |
| return at::nullopt; // no hashtable was loaded |
| } |
| GnuHash hash_obj = precomputed_hash ? *precomputed_hash : GnuHash(name); |
| auto hash = hash_obj.hash; |
| auto name_len = hash_obj.name_len; |
| constexpr uint32_t kBloomMaskBits = sizeof(Elf64_Addr) * 8; |
| |
| const uint32_t word_num = (hash / kBloomMaskBits) & gnu_maskwords_; |
| const Elf64_Addr bloom_word = gnu_bloom_filter_[word_num]; |
| const uint32_t h1 = hash % kBloomMaskBits; |
| const uint32_t h2 = (hash >> gnu_shift2_) % kBloomMaskBits; |
| |
| if ((1 & (bloom_word >> h1) & (bloom_word >> h2)) != 1) { |
| return at::nullopt; |
| } |
| |
| uint32_t sym_idx = gnu_bucket_[hash % gnu_nbucket_]; |
| if (sym_idx == 0) { |
| return at::nullopt; |
| } |
| |
| uint32_t chain_value = 0; |
| const Elf64_Sym* sym = nullptr; |
| |
| do { |
| sym = symtab_ + sym_idx; |
| chain_value = gnu_chain_[sym_idx]; |
| if ((chain_value >> 1) == (hash >> 1)) { |
| if (static_cast<size_t>(sym->st_name) + name_len + 1 <= strtab_size_ && |
| memcmp(strtab_ + sym->st_name, name, name_len + 1) == 0) { |
| // found the matching entry, is it defined? |
| if (sym->st_shndx != 0) { |
| return sym->st_value + |
| ((ELF64_ST_TYPE(sym->st_info) == STT_TLS) ? 0 : load_bias_); |
| } |
| // symbol isn't defined |
| return at::nullopt; |
| } |
| } |
| ++sym_idx; |
| } while ((chain_value & 1) == 0); |
| return at::nullopt; |
| } |
| }; |
| |
| // for resolving TLS offsets we need to look through |
| // libc's already loaded libraries. We do not have the whole |
| // ELF file mapped in this case just a pointer to the program headers and |
| // the load_bias (offset in memory) where the library was loaded. |
| struct AlreadyLoadedSymTable { |
| private: |
| ElfDynamicInfo dyninfo_; |
| |
| public: |
| AlreadyLoadedSymTable( |
| const char* name, |
| Elf64_Addr load_bias, |
| const Elf64_Phdr* program_headers, |
| size_t n_program_headers) { |
| Elf64_Dyn* dynamic = nullptr; |
| for (const auto i : c10::irange(n_program_headers)) { |
| const Elf64_Phdr* phdr = &program_headers[i]; |
| |
| // Segment addresses in memory. |
| Elf64_Addr seg_start = phdr->p_vaddr + load_bias; |
| if (phdr->p_type == PT_DYNAMIC) { |
| dynamic = reinterpret_cast<Elf64_Dyn*>(seg_start); |
| break; |
| } |
| } |
| DEPLOY_CHECK( |
| dynamic, "%s: couldn't find PT_DYNAMIC in already loaded table.", name); |
| dyninfo_.initialize_from_dynamic_section(name, dynamic, load_bias, true); |
| } |
| |
| at::optional<Elf64_Addr> sym(const char* name) { |
| return dyninfo_.sym(name); |
| } |
| }; |
| static int iterate_cb(struct dl_phdr_info* info, size_t size, void* data) { |
| auto fn = (std::function<int(struct dl_phdr_info * info, size_t size)>*)data; |
| return (*fn)(info, size); |
| } |
| |
| // we need to find a TLS offset / module_id pair for a symbol which we cannot do |
| // with a normal dlsym call. Instead we iterate through all loaded libraries and |
| // check their symbol tables for the symbol. The value of the symbol is the TLS |
| // offset. When we find the library we also get the module id. |
| at::optional<TLSIndex> slow_find_tls_symbol_offset(const char* sym_name) { |
| at::optional<TLSIndex> result = at::nullopt; |
| std::function<int(struct dl_phdr_info*, size_t)> cb = |
| [&](struct dl_phdr_info* info, size_t size) { |
| // std::cout << "SEARCHING .. " << info->dlpi_name << "\n"; |
| AlreadyLoadedSymTable symtable( |
| info->dlpi_name, |
| info->dlpi_addr, |
| info->dlpi_phdr, |
| info->dlpi_phnum); |
| auto sym_addr = symtable.sym(sym_name); |
| if (sym_addr) { |
| // std::cout << "FOUND IT IN: " << info->dlpi_name << " it has modid: |
| // " << info->dlpi_tls_modid << "\n"; |
| result = TLSIndex{info->dlpi_tls_modid, *sym_addr}; |
| return 1; |
| } |
| return 0; |
| }; |
| |
| dl_iterate_phdr(iterate_cb, (void*)&cb); |
| return result; |
| } |
| |
| at::optional<TLSIndex> SystemLibraryImpl::tls_sym(const char* name) const { |
| if (!sym(name)) { |
| return at::nullopt; // before we do a bunch of slow lookups to find the |
| // module_id, check that this even defines the symbol |
| } |
| if (handle_ == RTLD_DEFAULT) { |
| return slow_find_tls_symbol_offset(name); |
| } |
| |
| struct link_map* lm = nullptr; |
| DEPLOY_CHECK( |
| 0 == dlinfo(handle_, RTLD_DI_LINKMAP, &lm), "failed to query dlinfo"); |
| std::cout << "TLS dlinfo LOOKUP " << lm->l_name << " " << name << " " |
| << "\n"; |
| |
| ElfDynamicInfo info; |
| info.initialize_from_dynamic_section(lm->l_name, lm->l_ld, lm->l_addr, true); |
| auto r = info.sym(name); |
| if (r) { |
| size_t module_id = 0; |
| DEPLOY_CHECK( |
| 0 == dlinfo(handle_, RTLD_DI_TLS_MODID, &module_id), |
| "failed to query dlinfo for module_id"); |
| return TLSIndex{module_id, *r}; |
| } |
| return at::nullopt; |
| } |
| |
| // dlopen does not accept additional search paths as an argument. |
| // however, normal DT_NEEDED library load inherits the runpath of parents. |
| // So we need to pre-find all the libraries and call dlopen on them directly to |
| // get the same behavior. We can find the dependencies by reading the libraries |
| // dynamic section for recursive DT_NEEED entries. |
| void resolve_needed_libraries( |
| std::vector<std::shared_ptr<SymbolProvider>>& libraries, |
| const std::string& origin_relative, |
| std::vector<std::string>& search_path, |
| const std::string& runpath_template, |
| const std::vector<const char*>& needed) { |
| size_t search_path_start_size = search_path.size(); |
| |
| std::string origin = resolve_origin(origin_relative); |
| std::vector<std::string> paths = split_path(runpath_template, ':'); |
| // backwards because we want paths to be search in order but we search |
| // search_path backward |
| for (size_t i = paths.size(); i > 0; --i) { |
| search_path.emplace_back(resolve_path(origin, paths[i - 1])); |
| } |
| |
| for (const char* name : needed) { |
| // std::cout << "ATTEMPTING FIND " << name << "\n"; |
| if (strcmp(name, "libtorch_python.so") == 0) { |
| // torchvision expects it... |
| continue; |
| } |
| // find the library, either (1) it is already loaded, |
| // (2) it is an absolute path that exists, |
| // (3) we find it in the search path |
| // (4) we can dlopen it |
| |
| // (1) the library is already loaded |
| const int base_flags = RTLD_LAZY | RTLD_LOCAL; |
| void* handle = dlopen(name, base_flags | RTLD_NOLOAD); |
| if (handle) { |
| // std::cout << "ALREADY LOADED " << name << "\n"; |
| libraries.emplace_back(SystemLibrary::create(handle, true)); |
| continue; |
| } |
| |
| std::string library_path = ""; |
| // (2) it is an absolute path |
| if (strchr(name, '/') != nullptr) { |
| library_path = name; |
| } else { |
| // (3) find it in the search path |
| for (size_t i = search_path.size(); i > 0; --i) { |
| std::stringstream ss; |
| ss << search_path[i - 1] << "/" << name; |
| if (access(ss.str().c_str(), F_OK) == 0) { |
| library_path = ss.str(); |
| break; |
| } |
| } |
| } |
| |
| std::vector<std::shared_ptr<SymbolProvider>> |
| sublibraries; // these need to say loaded until we open library_path |
| // otherwise we might dlclose a sublibrary |
| |
| if (library_path != "") { |
| // std::cout << "LOOKING FOR SUBLIBRARIES FOR FILE AT PATH " << |
| // library_path << "\n"; we found the actual file, recursively load its |
| // deps before opening it so we resolve their paths correctly |
| MemFile image(library_path.c_str()); |
| auto search = |
| load_needed_from_elf_file(library_path.c_str(), image.data()); |
| resolve_needed_libraries( |
| sublibraries, library_path, search_path, search.first, search.second); |
| } else { |
| library_path = name; |
| } |
| |
| // either we didn't find the file, or we have already loaded its deps |
| // in both cases, we now try to call dlopen. In the case where we didn't |
| // find the file, we hope that something like LD_LIBRARY_PATH knows where it |
| // is. In the case where we found it, we know its deps are loaded and |
| // resolved. |
| |
| // std::cout << "OPENING " << library_path << "\n"; |
| handle = dlopen(library_path.c_str(), base_flags); |
| DEPLOY_CHECK( |
| handle, "{}: could not load library, dlopen says: {}", name, dlerror()); |
| libraries.emplace_back(SystemLibrary::create(handle, true)); |
| } |
| |
| // unwind search_path stack |
| search_path.erase( |
| search_path.begin() + search_path_start_size, search_path.end()); |
| } |
| |
| // NOLINTNEXTLINE |
| extern "C" void* __dso_handle; |
| |
| struct __attribute__((visibility("hidden"))) CustomLibraryImpl |
| : public std::enable_shared_from_this<CustomLibraryImpl>, |
| public CustomLibrary { |
| CustomLibraryImpl(const char* filename, int argc, const char** argv) |
| : contents_(filename), |
| mapped_library_(nullptr), |
| name_(filename), |
| argc_(argc), |
| argv_(argv) { |
| pthread_key_create(&tls_key_, nullptr); |
| data_ = contents_.data(); |
| header_ = (Elf64_Ehdr*)data_; |
| program_headers_ = (Elf64_Phdr*)(data_ + header_->e_phoff); |
| n_program_headers_ = header_->e_phnum; |
| } |
| void add_search_library(std::shared_ptr<SymbolProvider> lib) override { |
| symbol_search_path_.emplace_back(std::move(lib)); |
| } |
| |
| void check_library_format() { |
| DEPLOY_CHECK( |
| 0 == memcmp(header_->e_ident, ELFMAG, SELFMAG), |
| "{}: not an ELF file", |
| this->name_); |
| DEPLOY_CHECK( |
| header_->e_type == ET_DYN, |
| "{}: is not shared object file", |
| this->name_); |
| DEPLOY_CHECK( |
| header_->e_ident[EI_CLASS] == ELFCLASS64, |
| "{}: is not ELF64 format", |
| this->name_); |
| DEPLOY_CHECK( |
| header_->e_ident[EI_DATA] == ELFDATA2LSB, |
| "{}: is not 2's complement, little endian", |
| this->name_); |
| DEPLOY_CHECK( |
| header_->e_machine == EM_X86_64, |
| "{}: is not in x86_64 format", |
| this->name_); |
| } |
| |
| void reserve_address_space() { |
| Elf64_Addr min_vaddr = 0; |
| Elf64_Addr max_vaddr = 0; |
| mapped_size_ = phdr_table_get_load_size( |
| program_headers_, n_program_headers_, &min_vaddr, &max_vaddr); |
| mapped_library_ = mmap( |
| nullptr, mapped_size_, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
| load_bias_ = |
| (const char*)mapped_library_ - reinterpret_cast<const char*>(min_vaddr); |
| } |
| |
| void load_segments() { |
| // from bionic |
| for (const auto i : c10::irange(n_program_headers_)) { |
| const Elf64_Phdr* phdr = &program_headers_[i]; |
| |
| // Segment addresses in memory. |
| Elf64_Addr seg_start = phdr->p_vaddr + load_bias_; |
| Elf64_Addr seg_end = seg_start + phdr->p_memsz; |
| |
| switch (phdr->p_type) { |
| case PT_DYNAMIC: |
| dynamic_ = reinterpret_cast<Elf64_Dyn*>(seg_start); |
| break; |
| case PT_GNU_EH_FRAME: |
| eh_frame_hdr_ = reinterpret_cast<EH_Frame_HDR*>(seg_start); |
| DEPLOY_CHECK( |
| eh_frame_hdr_->eh_frame_ptr_enc == 0x1b, |
| "unsupported eh_frame_pointer_enc {}", |
| eh_frame_hdr_->eh_frame_ptr_enc); |
| eh_frame_ = |
| (void*)((int64_t)&eh_frame_hdr_->eh_frame_ptr + eh_frame_hdr_->eh_frame_ptr); |
| break; |
| case PT_TLS: |
| tls_file_size_ = phdr->p_filesz; |
| tls_mem_size_ = phdr->p_memsz; |
| tls_initalization_image_ = (void*)seg_start; |
| break; |
| }; |
| |
| if (phdr->p_type != PT_LOAD) { |
| continue; |
| } |
| |
| Elf64_Addr seg_page_start = PAGE_START(seg_start); |
| Elf64_Addr seg_page_end = PAGE_END(seg_end); |
| |
| Elf64_Addr seg_file_end = seg_start + phdr->p_filesz; |
| |
| // File offsets. |
| Elf64_Addr file_start = phdr->p_offset; |
| Elf64_Addr file_end = file_start + phdr->p_filesz; |
| |
| Elf64_Addr file_page_start = PAGE_START(file_start); |
| Elf64_Addr file_length = file_end - file_page_start; |
| |
| if (contents_.size() <= 0) { |
| DEPLOY_ERROR( |
| "\"{}\" invalid file size: {}", name_.c_str(), contents_.size()); |
| } |
| |
| if (file_end > contents_.size()) { |
| DEPLOY_ERROR( |
| "invalid ELF file \"{}\" load segment[{}]:" |
| " p_offset ({}) + p_filesz ({}) ( = {}) past end of file " |
| "({})", |
| name_.c_str(), |
| i, |
| reinterpret_cast<void*>(phdr->p_offset), |
| reinterpret_cast<void*>(phdr->p_filesz), |
| reinterpret_cast<void*>(file_end), |
| contents_.size()); |
| } |
| |
| if (file_length != 0) { |
| int prot = PFLAGS_TO_PROT(phdr->p_flags); |
| |
| void* seg_addr = mmap64( |
| reinterpret_cast<void*>(seg_page_start), |
| file_length, |
| prot | PROT_WRITE, // initially everything is writable to do |
| // relocations |
| MAP_FIXED | MAP_PRIVATE, |
| contents_.fd(), |
| file_page_start); |
| fixup_prot_.emplace_back([=]() { |
| mprotect(reinterpret_cast<void*>(seg_page_start), file_length, prot); |
| }); |
| if (seg_addr == MAP_FAILED) { |
| DEPLOY_ERROR( |
| "couldn't map \"{}\" segment {}: {}", |
| name_.c_str(), |
| i, |
| strerror(errno)); |
| } |
| } |
| |
| // if the segment is writable, and does not end on a page boundary, |
| // zero-fill it until the page limit. |
| if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) { |
| memset( |
| reinterpret_cast<void*>(seg_file_end), |
| 0, |
| PAGE_SIZE - PAGE_OFFSET(seg_file_end)); |
| } |
| |
| seg_file_end = PAGE_END(seg_file_end); |
| |
| // seg_file_end is now the first page address after the file |
| // content. If seg_end is larger, we need to zero anything |
| // between them. This is done by using a private anonymous |
| // map for all extra pages. |
| if (seg_page_end > seg_file_end) { |
| size_t zeromap_size = seg_page_end - seg_file_end; |
| int prot = PFLAGS_TO_PROT(phdr->p_flags); |
| void* zeromap = mmap( |
| reinterpret_cast<void*>(seg_file_end), |
| zeromap_size, |
| prot | PROT_WRITE, |
| MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, |
| -1, |
| 0); |
| fixup_prot_.emplace_back([=]() { |
| mprotect(reinterpret_cast<void*>(seg_file_end), zeromap_size, prot); |
| }); |
| if (zeromap == MAP_FAILED) { |
| DEPLOY_ERROR( |
| "couldn't zero fill \"{}\" gap: {}", |
| name_.c_str(), |
| strerror(errno)); |
| } |
| } |
| } |
| } |
| size_t module_id() const { |
| size_t this_as_number = (size_t)this; |
| return this_as_number | TLS_LOCAL_FLAG; |
| } |
| |
| void read_dynamic_section() { |
| dyninfo_.initialize_from_dynamic_section( |
| name_, dynamic_, load_bias_, false); |
| std::vector<std::string> empty_search_path; |
| resolve_needed_libraries( |
| symbol_search_path_, |
| name_, |
| empty_search_path, |
| dyninfo_.runpath_, |
| dyninfo_.needed_); |
| } |
| |
| at::optional<Elf64_Addr> lookup_symbol(Elf64_Xword r_info) { |
| const uint32_t r_type = ELF64_R_TYPE(r_info); |
| const uint32_t r_sym = ELF64_R_SYM(r_info); |
| |
| if (r_sym == 0) { |
| return (Elf64_Addr)0; |
| } |
| auto sym_st = dyninfo_.symtab_[r_sym]; |
| const char* sym_name = dyninfo_.get_string(sym_st.st_name); |
| if (r_type == R_X86_64_JUMP_SLOT) { |
| if (strcmp(sym_name, "__tls_get_addr") == 0) { |
| return (Elf64_Addr)local__tls_get_addr; |
| } |
| if (strcmp(sym_name, "__cxa_thread_atexit") == 0) { |
| return (Elf64_Addr)__cxa_thread_atexit_impl; |
| } |
| } |
| for (const auto& sys_lib : symbol_search_path_) { |
| auto r = sys_lib->sym(sym_name); |
| if (r) { |
| return r; |
| } |
| } |
| auto r = sym(sym_name); |
| if (r) { |
| return r; |
| } |
| if (ELF64_ST_BIND(sym_st.st_info) != STB_WEAK) { |
| DEPLOY_ERROR( |
| "{}: '{}' symbol not found in ElfFile lookup", |
| name_.c_str(), |
| sym_name); |
| } |
| return at::nullopt; |
| } |
| |
| at::optional<TLSIndex> tls_lookup_symbol(Elf64_Xword r_info) { |
| const uint32_t r_sym = ELF64_R_SYM(r_info); |
| |
| if (r_sym == 0) { |
| return TLSIndex{ |
| module_id(), |
| 0}; // note: offset is not queried when the symbol is blank |
| } |
| |
| auto sym_st = dyninfo_.symtab_[r_sym]; |
| const char* sym_name = dyninfo_.get_string(sym_st.st_name); |
| for (const auto& sys_lib : symbol_search_path_) { |
| auto r = sys_lib->tls_sym(sym_name); |
| if (r) { |
| return r; |
| } |
| } |
| auto r = tls_sym(sym_name); |
| if (r) { |
| return r; |
| } |
| |
| if (ELF64_ST_BIND(sym_st.st_info) != STB_WEAK) { |
| DEPLOY_ERROR( |
| "{}: '{}' symbol not found in ElfFile lookup", |
| name_.c_str(), |
| sym_name); |
| } |
| return at::nullopt; |
| } |
| |
| void relocate_one(const Elf64_Rela& reloc) { |
| const uint32_t r_type = ELF64_R_TYPE(reloc.r_info); |
| |
| if (r_type == 0) { |
| return; |
| } |
| |
| void* const rel_target = |
| reinterpret_cast<void*>(reloc.r_offset + load_bias_); |
| |
| // TLS relocations need to lookup symbols differently so we can get the |
| // module_id |
| if (r_type == R_X86_64_DTPMOD64 || r_type == R_X86_64_DTPOFF64) { |
| auto tls_index = tls_lookup_symbol(reloc.r_info); |
| if (!tls_index) { |
| return; // skip weak relocation that wasn't found |
| } |
| switch (r_type) { |
| case R_X86_64_DTPMOD64: |
| *static_cast<size_t*>(rel_target) = tls_index->module_id; |
| break; |
| case R_X86_64_DTPOFF64: |
| *static_cast<Elf64_Addr*>(rel_target) = |
| tls_index->offset + reloc.r_addend; |
| break; |
| } |
| return; |
| } |
| |
| auto sym_addr = lookup_symbol(reloc.r_info); |
| if (!sym_addr) { |
| return; // skip weak relocation that wasn't found |
| } |
| |
| switch (r_type) { |
| case R_X86_64_JUMP_SLOT: |
| case R_X86_64_64: |
| case R_X86_64_GLOB_DAT: { |
| const Elf64_Addr result = *sym_addr + reloc.r_addend; |
| *static_cast<Elf64_Addr*>(rel_target) = result; |
| } break; |
| case R_X86_64_RELATIVE: { |
| // In practice, r_sym is always zero, but if it weren't, the linker |
| // would still look up the referenced symbol (and abort if the symbol |
| // isn't found), even though it isn't used. |
| const Elf64_Addr result = load_bias_ + reloc.r_addend; |
| *static_cast<Elf64_Addr*>(rel_target) = result; |
| } break; |
| case R_X86_64_32: { |
| const Elf32_Addr result = *sym_addr + reloc.r_addend; |
| *static_cast<Elf32_Addr*>(rel_target) = result; |
| } break; |
| case R_X86_64_PC32: { |
| const Elf64_Addr target = *sym_addr + reloc.r_addend; |
| const Elf64_Addr base = reinterpret_cast<Elf64_Addr>(rel_target); |
| const Elf32_Addr result = target - base; |
| *static_cast<Elf32_Addr*>(rel_target) = result; |
| } break; |
| default: |
| DEPLOY_ERROR("unknown reloc type {} in \"{}\"", r_type, name_.c_str()); |
| break; |
| } |
| } |
| |
| void relocate() { |
| for (const auto i : c10::irange(dyninfo_.n_rela_)) { |
| relocate_one(dyninfo_.rela_[i]); |
| } |
| for (const auto i : c10::irange(dyninfo_.n_plt_rela_)) { |
| relocate_one(dyninfo_.plt_rela_[i]); |
| } |
| } |
| |
| void initialize() { |
| call_function(dyninfo_.init_func_); |
| for (const auto i : c10::irange(dyninfo_.n_init_array_)) { |
| call_function(dyninfo_.init_array_[i]); |
| } |
| initialized_ = true; |
| } |
| |
| void finalize() { |
| for (size_t i = dyninfo_.n_fini_array_; i > 0; --i) { |
| call_function(dyninfo_.fini_array_[i - 1]); |
| } |
| call_function(dyninfo_.fini_func_); |
| } |
| |
| void register_debug_info() { |
| // std::cout << "target modules add " << name_.c_str() << "\n"; |
| // std::cout << "target modules load -f " << name_.c_str() << " -s " |
| // << std::hex << "0x" << load_bias_ << "\n"; |
| __deploy_module_info.name = name_.c_str(); |
| __deploy_module_info.file_addr = (Elf64_Addr)contents_.data(); |
| __deploy_module_info.file_size = contents_.size(); |
| __deploy_module_info.load_bias = load_bias_; |
| // debugger script sets a breakpoint on this function, |
| // then reads __deploy_module_info to issue the target module commands. |
| __deploy_register_code(); |
| } |
| |
| // remove the extra write flags from read-only sections |
| void protect() { |
| for (const auto& fixup : fixup_prot_) { |
| fixup(); |
| } |
| } |
| |
| void load() override { |
| check_library_format(); |
| reserve_address_space(); |
| load_segments(); |
| read_dynamic_section(); |
| relocate(); |
| protect(); |
| __register_frame(eh_frame_); |
| eh_frame_registered_ = true; |
| register_debug_info(); |
| initialize(); |
| } |
| |
| ~CustomLibraryImpl() override { |
| // std::cout << "LINKER IS UNLOADING: " << name_ << "\n"; |
| if (initialized_) { |
| finalize(); |
| } |
| if (eh_frame_registered_) { |
| __deregister_frame(eh_frame_); |
| } |
| if (mapped_library_) { |
| munmap(mapped_library_, mapped_size_); |
| } |
| } |
| void call_function(linker_dtor_function_t f) { |
| if (f == nullptr || (int64_t)f == -1) |
| return; |
| f(); |
| } |
| void call_function(linker_ctor_function_t f) { |
| if (f == nullptr || (int64_t)f == -1) |
| return; |
| f(argc_, argv_, environ); |
| } |
| |
| at::optional<Elf64_Addr> sym(const char* name) const override { |
| return dyninfo_.sym(name); |
| } |
| |
| at::optional<TLSIndex> tls_sym(const char* name) const override { |
| auto r = dyninfo_.sym(name); |
| if (r) { |
| return TLSIndex{module_id(), *r}; |
| } |
| return at::nullopt; |
| } |
| |
| void* tls_addr(size_t offset) { |
| // this was a TLS entry for one of our modules, so we use pthreads to |
| // emulate thread local state. |
| void* start = pthread_getspecific(tls_key_); |
| if (!start) { |
| auto tls_mem = new TLSMemory(shared_from_this(), tls_mem_size_); |
| __cxa_thread_atexit_impl(delete_TLSMemory, tls_mem, &__dso_handle); |
| start = tls_mem->mem_; |
| memcpy(start, tls_initalization_image_, tls_file_size_); |
| memset( |
| (void*)((const char*)start + tls_file_size_), |
| 0, |
| tls_mem_size_ - tls_file_size_); |
| pthread_setspecific(tls_key_, start); |
| } |
| return (void*)((const char*)start + offset); |
| } |
| |
| private: |
| MemFile contents_; |
| const char* data_ = nullptr; |
| const Elf64_Ehdr* header_ = nullptr; |
| const Elf64_Phdr* program_headers_ = nullptr; |
| const EH_Frame_HDR* eh_frame_hdr_ = nullptr; |
| void* eh_frame_ = nullptr; |
| size_t n_program_headers_ = 0; |
| void* mapped_library_ = nullptr; |
| size_t mapped_size_ = 0; |
| Elf64_Addr load_bias_ = 0; |
| Elf64_Dyn* dynamic_ = nullptr; |
| ElfDynamicInfo dyninfo_; |
| std::string name_; |
| int argc_ = 0; |
| const char** argv_ = nullptr; |
| bool initialized_ = false; |
| bool eh_frame_registered_ = false; |
| |
| pthread_key_t tls_key_ = 0; |
| void* tls_initalization_image_ = nullptr; |
| size_t tls_file_size_ = 0; |
| size_t tls_mem_size_ = 0; |
| |
| std::vector<std::shared_ptr<SymbolProvider>> symbol_search_path_; |
| std::vector<std::function<void(void)>> fixup_prot_; |
| }; |
| |
| std::shared_ptr<CustomLibrary> CustomLibrary::create( |
| const char* filename, |
| int argc, |
| const char** argv) { |
| return std::make_shared<CustomLibraryImpl>(filename, argc, argv); |
| } |
| |
| static void* local__tls_get_addr(TLSIndex* idx) { |
| if ((idx->module_id & TLS_LOCAL_FLAG) != 0) { |
| return ((CustomLibraryImpl*)(idx->module_id & ~TLS_LOCAL_FLAG)) |
| ->tls_addr(idx->offset); |
| } |
| return __tls_get_addr(idx); |
| } |
| |
| } // namespace deploy |
| } // namespace torch |