| /* |
| * Copyright (c) 2016 Facebook, Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <fcntl.h> |
| #include <linux/elf.h> |
| #include <linux/perf_event.h> |
| #include <sys/epoll.h> |
| #include <unistd.h> |
| #include <cerrno> |
| #include <cinttypes> |
| #include <cstdint> |
| #include <cstring> |
| #include <iostream> |
| #include <memory> |
| |
| #include "BPFTable.h" |
| |
| #include "bcc_exception.h" |
| #include "bcc_syms.h" |
| #include "common.h" |
| #include "file_desc.h" |
| #include "libbpf.h" |
| #include "perf_reader.h" |
| |
| namespace ebpf { |
| |
| BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {} |
| |
| StatusTuple BPFTable::get_value(const std::string& key_str, |
| std::string& value_str) { |
| char key[desc.key_size]; |
| char value[desc.leaf_size]; |
| |
| StatusTuple r(0); |
| |
| r = string_to_key(key_str, key); |
| if (r.code() != 0) |
| return r; |
| |
| if (!lookup(key, value)) |
| return StatusTuple(-1, "error getting value"); |
| |
| return leaf_to_string(value, value_str); |
| } |
| |
| StatusTuple BPFTable::get_value(const std::string& key_str, |
| std::vector<std::string>& value_str) { |
| size_t ncpus = get_possible_cpus().size(); |
| char key[desc.key_size]; |
| char value[desc.leaf_size * ncpus]; |
| |
| StatusTuple r(0); |
| |
| r = string_to_key(key_str, key); |
| if (r.code() != 0) |
| return r; |
| |
| if (!lookup(key, value)) |
| return StatusTuple(-1, "error getting value"); |
| |
| value_str.resize(ncpus); |
| |
| for (size_t i = 0; i < ncpus; i++) { |
| r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i)); |
| if (r.code() != 0) |
| return r; |
| } |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFTable::update_value(const std::string& key_str, |
| const std::string& value_str) { |
| char key[desc.key_size]; |
| char value[desc.leaf_size]; |
| |
| StatusTuple r(0); |
| |
| r = string_to_key(key_str, key); |
| if (r.code() != 0) |
| return r; |
| |
| r = string_to_leaf(value_str, value); |
| if (r.code() != 0) |
| return r; |
| |
| if (!update(key, value)) |
| return StatusTuple(-1, "error updating element"); |
| |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFTable::update_value(const std::string& key_str, |
| const std::vector<std::string>& value_str) { |
| size_t ncpus = get_possible_cpus().size(); |
| char key[desc.key_size]; |
| char value[desc.leaf_size * ncpus]; |
| |
| StatusTuple r(0); |
| |
| r = string_to_key(key_str, key); |
| if (r.code() != 0) |
| return r; |
| |
| if (value_str.size() != ncpus) |
| return StatusTuple(-1, "bad value size"); |
| |
| for (size_t i = 0; i < ncpus; i++) { |
| r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size); |
| if (r.code() != 0) |
| return r; |
| } |
| |
| if (!update(key, value)) |
| return StatusTuple(-1, "error updating element"); |
| |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFTable::remove_value(const std::string& key_str) { |
| char key[desc.key_size]; |
| |
| StatusTuple r(0); |
| |
| r = string_to_key(key_str, key); |
| if (r.code() != 0) |
| return r; |
| |
| if (!remove(key)) |
| return StatusTuple(-1, "error removing element"); |
| |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFTable::clear_table_non_atomic() { |
| if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH || |
| desc.type == BPF_MAP_TYPE_LRU_HASH || |
| desc.type == BPF_MAP_TYPE_PERCPU_HASH || |
| desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) { |
| // For hash maps, use the first() interface (which uses get_next_key) to |
| // iterate through the map and clear elements |
| auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size), |
| ::free); |
| |
| while (this->first(key.get())) |
| if (!this->remove(key.get())) { |
| return StatusTuple(-1, |
| "Failed to delete element when clearing table %s", |
| desc.name.c_str()); |
| } |
| } else if (desc.type == BPF_MAP_TYPE_ARRAY || |
| desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
| return StatusTuple(-1, "Array map %s do not support clearing elements", |
| desc.name.c_str()); |
| } else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY || |
| desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || |
| desc.type == BPF_MAP_TYPE_STACK_TRACE || |
| desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { |
| // For Stack-trace and FD arrays, just iterate over all indices |
| for (size_t i = 0; i < desc.max_entries; i++) { |
| this->remove(&i); |
| } |
| } else { |
| return StatusTuple(-1, "Clearing for map type of %s not supported yet", |
| desc.name.c_str()); |
| } |
| |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFTable::get_table_offline( |
| std::vector<std::pair<std::string, std::string>> &res) { |
| StatusTuple r(0); |
| int err; |
| |
| auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size), |
| ::free); |
| auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size), |
| ::free); |
| std::string key_str; |
| std::string value_str; |
| |
| if (desc.type == BPF_MAP_TYPE_ARRAY || |
| desc.type == BPF_MAP_TYPE_PROG_ARRAY || |
| desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || |
| desc.type == BPF_MAP_TYPE_PERCPU_ARRAY || |
| desc.type == BPF_MAP_TYPE_CGROUP_ARRAY || |
| desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || |
| desc.type == BPF_MAP_TYPE_DEVMAP || |
| desc.type == BPF_MAP_TYPE_CPUMAP || |
| desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { |
| // For arrays, just iterate over all indices |
| for (size_t i = 0; i < desc.max_entries; i++) { |
| err = bpf_lookup_elem(desc.fd, &i, value.get()); |
| if (err < 0 && errno == ENOENT) { |
| // Element is not present, skip it |
| continue; |
| } else if (err < 0) { |
| // Other error, abort |
| return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno)); |
| } |
| |
| r = key_to_string(&i, key_str); |
| if (r.code() != 0) |
| return r; |
| |
| r = leaf_to_string(value.get(), value_str); |
| if (r.code() != 0) |
| return r; |
| res.emplace_back(key_str, value_str); |
| } |
| } else { |
| res.clear(); |
| // For other maps, try to use the first() and next() interfaces |
| if (!this->first(key.get())) |
| return StatusTuple(0); |
| |
| while (true) { |
| if (!this->lookup(key.get(), value.get())) |
| break; |
| r = key_to_string(key.get(), key_str); |
| if (r.code() != 0) |
| return r; |
| |
| r = leaf_to_string(value.get(), value_str); |
| if (r.code() != 0) |
| return r; |
| res.emplace_back(key_str, value_str); |
| if (!this->next(key.get(), key.get())) |
| break; |
| } |
| } |
| |
| return StatusTuple(0); |
| } |
| |
| size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); } |
| |
| BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file, |
| bool check_debug_file_crc) |
| : BPFTableBase<int, stacktrace_t>(desc) { |
| if (desc.type != BPF_MAP_TYPE_STACK_TRACE) |
| throw std::invalid_argument("Table '" + desc.name + |
| "' is not a stack table"); |
| |
| symbol_option_ = {.use_debug_file = use_debug_file, |
| .check_debug_file_crc = check_debug_file_crc, |
| .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)}; |
| } |
| |
| BPFStackTable::BPFStackTable(BPFStackTable&& that) |
| : BPFTableBase<int, stacktrace_t>(that.desc), |
| symbol_option_(std::move(that.symbol_option_)), |
| pid_sym_(std::move(that.pid_sym_)) { |
| that.pid_sym_.clear(); |
| } |
| |
| BPFStackTable::~BPFStackTable() { |
| for (auto it : pid_sym_) |
| bcc_free_symcache(it.second, it.first); |
| } |
| |
| void BPFStackTable::clear_table_non_atomic() { |
| for (int i = 0; size_t(i) < capacity(); i++) { |
| remove(&i); |
| } |
| } |
| |
| std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) { |
| std::vector<uintptr_t> res; |
| stacktrace_t stack; |
| if (stack_id < 0) |
| return res; |
| if (!lookup(&stack_id, &stack)) |
| return res; |
| for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++) |
| res.push_back(stack.ip[i]); |
| return res; |
| } |
| |
| std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id, |
| int pid) { |
| auto addresses = get_stack_addr(stack_id); |
| std::vector<std::string> res; |
| if (addresses.empty()) |
| return res; |
| res.reserve(addresses.size()); |
| |
| if (pid < 0) |
| pid = -1; |
| if (pid_sym_.find(pid) == pid_sym_.end()) |
| pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_); |
| void* cache = pid_sym_[pid]; |
| |
| bcc_symbol symbol; |
| for (auto addr : addresses) |
| if (bcc_symcache_resolve(cache, addr, &symbol) != 0) |
| res.emplace_back("[UNKNOWN]"); |
| else { |
| res.push_back(symbol.demangle_name); |
| bcc_symbol_free_demangle_name(&symbol); |
| } |
| |
| return res; |
| } |
| |
| BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc) |
| : BPFTableBase<int, int>(desc), epfd_(-1) { |
| if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) |
| throw std::invalid_argument("Table '" + desc.name + |
| "' is not a perf buffer"); |
| } |
| |
| StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb, |
| perf_reader_lost_cb lost_cb, int cpu, |
| void* cb_cookie, int page_cnt) { |
| if (cpu_readers_.find(cpu) != cpu_readers_.end()) |
| return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu); |
| |
| auto reader = static_cast<perf_reader*>( |
| bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt)); |
| if (reader == nullptr) |
| return StatusTuple(-1, "Unable to construct perf reader"); |
| |
| int reader_fd = perf_reader_fd(reader); |
| if (!update(&cpu, &reader_fd)) { |
| perf_reader_free(static_cast<void*>(reader)); |
| return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu, |
| std::strerror(errno)); |
| } |
| |
| struct epoll_event event = {}; |
| event.events = EPOLLIN; |
| event.data.ptr = static_cast<void*>(reader); |
| if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) { |
| perf_reader_free(static_cast<void*>(reader)); |
| return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s", |
| std::strerror(errno)); |
| } |
| |
| cpu_readers_[cpu] = reader; |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb, |
| perf_reader_lost_cb lost_cb, |
| void* cb_cookie, int page_cnt) { |
| if (cpu_readers_.size() != 0 || epfd_ != -1) |
| return StatusTuple(-1, "Previously opened perf buffer not cleaned"); |
| |
| std::vector<int> cpus = get_online_cpus(); |
| ep_events_.reset(new epoll_event[cpus.size()]); |
| epfd_ = epoll_create1(EPOLL_CLOEXEC); |
| |
| for (int i : cpus) { |
| auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt); |
| if (res.code() != 0) { |
| TRY2(close_all_cpu()); |
| return res; |
| } |
| } |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) { |
| auto it = cpu_readers_.find(cpu); |
| if (it == cpu_readers_.end()) |
| return StatusTuple(0); |
| perf_reader_free(static_cast<void*>(it->second)); |
| if (!remove(const_cast<int*>(&(it->first)))) |
| return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first); |
| cpu_readers_.erase(it); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFPerfBuffer::close_all_cpu() { |
| std::string errors; |
| bool has_error = false; |
| |
| if (epfd_ >= 0) { |
| int close_res = close(epfd_); |
| epfd_ = -1; |
| ep_events_.reset(); |
| if (close_res != 0) { |
| has_error = true; |
| errors += std::string(std::strerror(errno)) + "\n"; |
| } |
| } |
| |
| std::vector<int> opened_cpus; |
| for (auto it : cpu_readers_) |
| opened_cpus.push_back(it.first); |
| for (int i : opened_cpus) { |
| auto res = close_on_cpu(i); |
| if (res.code() != 0) { |
| errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: "; |
| errors += res.msg() + "\n"; |
| has_error = true; |
| } |
| } |
| |
| if (has_error) |
| return StatusTuple(-1, errors); |
| return StatusTuple(0); |
| } |
| |
| int BPFPerfBuffer::poll(int timeout_ms) { |
| if (epfd_ < 0) |
| return -1; |
| int cnt = |
| epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms); |
| for (int i = 0; i < cnt; i++) |
| perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr)); |
| return cnt; |
| } |
| |
| BPFPerfBuffer::~BPFPerfBuffer() { |
| auto res = close_all_cpu(); |
| if (res.code() != 0) |
| std::cerr << "Failed to close all perf buffer on destruction: " << res.msg() |
| << std::endl; |
| } |
| |
| BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc) |
| : BPFTableBase<int, int>(desc) { |
| if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) |
| throw std::invalid_argument("Table '" + desc.name + |
| "' is not a perf event array"); |
| } |
| |
| StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) { |
| if (cpu_fds_.size() != 0) |
| return StatusTuple(-1, "Previously opened perf event not cleaned"); |
| |
| std::vector<int> cpus = get_online_cpus(); |
| |
| for (int i : cpus) { |
| auto res = open_on_cpu(i, type, config); |
| if (res.code() != 0) { |
| TRY2(close_all_cpu()); |
| return res; |
| } |
| } |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFPerfEventArray::close_all_cpu() { |
| std::string errors; |
| bool has_error = false; |
| |
| std::vector<int> opened_cpus; |
| for (auto it : cpu_fds_) |
| opened_cpus.push_back(it.first); |
| for (int i : opened_cpus) { |
| auto res = close_on_cpu(i); |
| if (res.code() != 0) { |
| errors += "Failed to close CPU" + std::to_string(i) + " perf event: "; |
| errors += res.msg() + "\n"; |
| has_error = true; |
| } |
| } |
| |
| if (has_error) |
| return StatusTuple(-1, errors); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type, |
| uint64_t config) { |
| if (cpu_fds_.find(cpu) != cpu_fds_.end()) |
| return StatusTuple(-1, "Perf event already open on CPU %d", cpu); |
| int fd = bpf_open_perf_event(type, config, -1, cpu); |
| if (fd < 0) { |
| return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64, |
| type, config); |
| } |
| if (!update(&cpu, &fd)) { |
| bpf_close_perf_event_fd(fd); |
| return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu, |
| std::strerror(errno)); |
| } |
| cpu_fds_[cpu] = fd; |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) { |
| auto it = cpu_fds_.find(cpu); |
| if (it == cpu_fds_.end()) { |
| return StatusTuple(0); |
| } |
| bpf_close_perf_event_fd(it->second); |
| cpu_fds_.erase(it); |
| return StatusTuple(0); |
| } |
| |
| BPFPerfEventArray::~BPFPerfEventArray() { |
| auto res = close_all_cpu(); |
| if (res.code() != 0) { |
| std::cerr << "Failed to close all perf buffer on destruction: " << res.msg() |
| << std::endl; |
| } |
| } |
| |
| BPFProgTable::BPFProgTable(const TableDesc& desc) |
| : BPFTableBase<int, int>(desc) { |
| if (desc.type != BPF_MAP_TYPE_PROG_ARRAY) |
| throw std::invalid_argument("Table '" + desc.name + |
| "' is not a prog table"); |
| } |
| |
| StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) { |
| if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd))) |
| return StatusTuple(-1, "Error updating value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFProgTable::remove_value(const int& index) { |
| if (!this->remove(const_cast<int*>(&index))) |
| return StatusTuple(-1, "Error removing value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| BPFCgroupArray::BPFCgroupArray(const TableDesc& desc) |
| : BPFTableBase<int, int>(desc) { |
| if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY) |
| throw std::invalid_argument("Table '" + desc.name + |
| "' is not a cgroup array"); |
| } |
| |
| StatusTuple BPFCgroupArray::update_value(const int& index, |
| const int& cgroup2_fd) { |
| if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd))) |
| return StatusTuple(-1, "Error updating value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFCgroupArray::update_value(const int& index, |
| const std::string& cgroup2_path) { |
| FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC)); |
| if ((int)f < 0) |
| return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str()); |
| TRY2(update_value(index, (int)f)); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFCgroupArray::remove_value(const int& index) { |
| if (!this->remove(const_cast<int*>(&index))) |
| return StatusTuple(-1, "Error removing value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| BPFDevmapTable::BPFDevmapTable(const TableDesc& desc) |
| : BPFTableBase<int, int>(desc) { |
| if(desc.type != BPF_MAP_TYPE_DEVMAP) |
| throw std::invalid_argument("Table '" + desc.name + |
| "' is not a devmap table"); |
| } |
| |
| StatusTuple BPFDevmapTable::update_value(const int& index, |
| const int& value) { |
| if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value))) |
| return StatusTuple(-1, "Error updating value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFDevmapTable::get_value(const int& index, |
| int& value) { |
| if (!this->lookup(const_cast<int*>(&index), &value)) |
| return StatusTuple(-1, "Error getting value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| StatusTuple BPFDevmapTable::remove_value(const int& index) { |
| if (!this->remove(const_cast<int*>(&index))) |
| return StatusTuple(-1, "Error removing value: %s", std::strerror(errno)); |
| return StatusTuple(0); |
| } |
| |
| } // namespace ebpf |