blob: 64fe77ca0b60738de7e6f7aedff044ff207c568e [file] [log] [blame]
/*
* Copyright (c) 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fcntl.h>
#include <linux/elf.h>
#include <linux/perf_event.h>
#include <sys/epoll.h>
#include <unistd.h>
#include <cerrno>
#include <cinttypes>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <memory>
#include "BPFTable.h"
#include "bcc_exception.h"
#include "bcc_syms.h"
#include "common.h"
#include "file_desc.h"
#include "libbpf.h"
#include "perf_reader.h"
namespace ebpf {
BPFTable::BPFTable(const TableDesc& desc) : BPFTableBase<void, void>(desc) {}
StatusTuple BPFTable::get_value(const std::string& key_str,
std::string& value_str) {
char key[desc.key_size];
char value[desc.leaf_size];
StatusTuple r(0);
r = string_to_key(key_str, key);
if (r.code() != 0)
return r;
if (!lookup(key, value))
return StatusTuple(-1, "error getting value");
return leaf_to_string(value, value_str);
}
StatusTuple BPFTable::get_value(const std::string& key_str,
std::vector<std::string>& value_str) {
size_t ncpus = get_possible_cpus().size();
char key[desc.key_size];
char value[desc.leaf_size * ncpus];
StatusTuple r(0);
r = string_to_key(key_str, key);
if (r.code() != 0)
return r;
if (!lookup(key, value))
return StatusTuple(-1, "error getting value");
value_str.resize(ncpus);
for (size_t i = 0; i < ncpus; i++) {
r = leaf_to_string(value + i * desc.leaf_size, value_str.at(i));
if (r.code() != 0)
return r;
}
return StatusTuple(0);
}
StatusTuple BPFTable::update_value(const std::string& key_str,
const std::string& value_str) {
char key[desc.key_size];
char value[desc.leaf_size];
StatusTuple r(0);
r = string_to_key(key_str, key);
if (r.code() != 0)
return r;
r = string_to_leaf(value_str, value);
if (r.code() != 0)
return r;
if (!update(key, value))
return StatusTuple(-1, "error updating element");
return StatusTuple(0);
}
StatusTuple BPFTable::update_value(const std::string& key_str,
const std::vector<std::string>& value_str) {
size_t ncpus = get_possible_cpus().size();
char key[desc.key_size];
char value[desc.leaf_size * ncpus];
StatusTuple r(0);
r = string_to_key(key_str, key);
if (r.code() != 0)
return r;
if (value_str.size() != ncpus)
return StatusTuple(-1, "bad value size");
for (size_t i = 0; i < ncpus; i++) {
r = string_to_leaf(value_str.at(i), value + i * desc.leaf_size);
if (r.code() != 0)
return r;
}
if (!update(key, value))
return StatusTuple(-1, "error updating element");
return StatusTuple(0);
}
StatusTuple BPFTable::remove_value(const std::string& key_str) {
char key[desc.key_size];
StatusTuple r(0);
r = string_to_key(key_str, key);
if (r.code() != 0)
return r;
if (!remove(key))
return StatusTuple(-1, "error removing element");
return StatusTuple(0);
}
StatusTuple BPFTable::clear_table_non_atomic() {
if (desc.type == BPF_MAP_TYPE_HASH || desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
desc.type == BPF_MAP_TYPE_LRU_HASH ||
desc.type == BPF_MAP_TYPE_PERCPU_HASH ||
desc.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
// For hash maps, use the first() interface (which uses get_next_key) to
// iterate through the map and clear elements
auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
::free);
while (this->first(key.get()))
if (!this->remove(key.get())) {
return StatusTuple(-1,
"Failed to delete element when clearing table %s",
desc.name.c_str());
}
} else if (desc.type == BPF_MAP_TYPE_ARRAY ||
desc.type == BPF_MAP_TYPE_PERCPU_ARRAY) {
return StatusTuple(-1, "Array map %s do not support clearing elements",
desc.name.c_str());
} else if (desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
desc.type == BPF_MAP_TYPE_STACK_TRACE ||
desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
// For Stack-trace and FD arrays, just iterate over all indices
for (size_t i = 0; i < desc.max_entries; i++) {
this->remove(&i);
}
} else {
return StatusTuple(-1, "Clearing for map type of %s not supported yet",
desc.name.c_str());
}
return StatusTuple(0);
}
StatusTuple BPFTable::get_table_offline(
std::vector<std::pair<std::string, std::string>> &res) {
StatusTuple r(0);
int err;
auto key = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.key_size),
::free);
auto value = std::unique_ptr<void, decltype(::free)*>(::malloc(desc.leaf_size),
::free);
std::string key_str;
std::string value_str;
if (desc.type == BPF_MAP_TYPE_ARRAY ||
desc.type == BPF_MAP_TYPE_PROG_ARRAY ||
desc.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
desc.type == BPF_MAP_TYPE_PERCPU_ARRAY ||
desc.type == BPF_MAP_TYPE_CGROUP_ARRAY ||
desc.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
desc.type == BPF_MAP_TYPE_DEVMAP ||
desc.type == BPF_MAP_TYPE_CPUMAP ||
desc.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
// For arrays, just iterate over all indices
for (size_t i = 0; i < desc.max_entries; i++) {
err = bpf_lookup_elem(desc.fd, &i, value.get());
if (err < 0 && errno == ENOENT) {
// Element is not present, skip it
continue;
} else if (err < 0) {
// Other error, abort
return StatusTuple(-1, "Error looking up value: %s", std::strerror(errno));
}
r = key_to_string(&i, key_str);
if (r.code() != 0)
return r;
r = leaf_to_string(value.get(), value_str);
if (r.code() != 0)
return r;
res.emplace_back(key_str, value_str);
}
} else {
res.clear();
// For other maps, try to use the first() and next() interfaces
if (!this->first(key.get()))
return StatusTuple(0);
while (true) {
if (!this->lookup(key.get(), value.get()))
break;
r = key_to_string(key.get(), key_str);
if (r.code() != 0)
return r;
r = leaf_to_string(value.get(), value_str);
if (r.code() != 0)
return r;
res.emplace_back(key_str, value_str);
if (!this->next(key.get(), key.get()))
break;
}
}
return StatusTuple(0);
}
size_t BPFTable::get_possible_cpu_count() { return get_possible_cpus().size(); }
BPFStackTable::BPFStackTable(const TableDesc& desc, bool use_debug_file,
bool check_debug_file_crc)
: BPFTableBase<int, stacktrace_t>(desc) {
if (desc.type != BPF_MAP_TYPE_STACK_TRACE)
throw std::invalid_argument("Table '" + desc.name +
"' is not a stack table");
symbol_option_ = {.use_debug_file = use_debug_file,
.check_debug_file_crc = check_debug_file_crc,
.use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)};
}
BPFStackTable::BPFStackTable(BPFStackTable&& that)
: BPFTableBase<int, stacktrace_t>(that.desc),
symbol_option_(std::move(that.symbol_option_)),
pid_sym_(std::move(that.pid_sym_)) {
that.pid_sym_.clear();
}
BPFStackTable::~BPFStackTable() {
for (auto it : pid_sym_)
bcc_free_symcache(it.second, it.first);
}
void BPFStackTable::clear_table_non_atomic() {
for (int i = 0; size_t(i) < capacity(); i++) {
remove(&i);
}
}
std::vector<uintptr_t> BPFStackTable::get_stack_addr(int stack_id) {
std::vector<uintptr_t> res;
stacktrace_t stack;
if (stack_id < 0)
return res;
if (!lookup(&stack_id, &stack))
return res;
for (int i = 0; (i < BPF_MAX_STACK_DEPTH) && (stack.ip[i] != 0); i++)
res.push_back(stack.ip[i]);
return res;
}
std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id,
int pid) {
auto addresses = get_stack_addr(stack_id);
std::vector<std::string> res;
if (addresses.empty())
return res;
res.reserve(addresses.size());
if (pid < 0)
pid = -1;
if (pid_sym_.find(pid) == pid_sym_.end())
pid_sym_[pid] = bcc_symcache_new(pid, &symbol_option_);
void* cache = pid_sym_[pid];
bcc_symbol symbol;
for (auto addr : addresses)
if (bcc_symcache_resolve(cache, addr, &symbol) != 0)
res.emplace_back("[UNKNOWN]");
else {
res.push_back(symbol.demangle_name);
bcc_symbol_free_demangle_name(&symbol);
}
return res;
}
BPFPerfBuffer::BPFPerfBuffer(const TableDesc& desc)
: BPFTableBase<int, int>(desc), epfd_(-1) {
if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
throw std::invalid_argument("Table '" + desc.name +
"' is not a perf buffer");
}
StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb,
perf_reader_lost_cb lost_cb, int cpu,
void* cb_cookie, int page_cnt) {
if (cpu_readers_.find(cpu) != cpu_readers_.end())
return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);
auto reader = static_cast<perf_reader*>(
bpf_open_perf_buffer(cb, lost_cb, cb_cookie, -1, cpu, page_cnt));
if (reader == nullptr)
return StatusTuple(-1, "Unable to construct perf reader");
int reader_fd = perf_reader_fd(reader);
if (!update(&cpu, &reader_fd)) {
perf_reader_free(static_cast<void*>(reader));
return StatusTuple(-1, "Unable to open perf buffer on CPU %d: %s", cpu,
std::strerror(errno));
}
struct epoll_event event = {};
event.events = EPOLLIN;
event.data.ptr = static_cast<void*>(reader);
if (epoll_ctl(epfd_, EPOLL_CTL_ADD, reader_fd, &event) != 0) {
perf_reader_free(static_cast<void*>(reader));
return StatusTuple(-1, "Unable to add perf_reader FD to epoll: %s",
std::strerror(errno));
}
cpu_readers_[cpu] = reader;
return StatusTuple(0);
}
StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
perf_reader_lost_cb lost_cb,
void* cb_cookie, int page_cnt) {
if (cpu_readers_.size() != 0 || epfd_ != -1)
return StatusTuple(-1, "Previously opened perf buffer not cleaned");
std::vector<int> cpus = get_online_cpus();
ep_events_.reset(new epoll_event[cpus.size()]);
epfd_ = epoll_create1(EPOLL_CLOEXEC);
for (int i : cpus) {
auto res = open_on_cpu(cb, lost_cb, i, cb_cookie, page_cnt);
if (res.code() != 0) {
TRY2(close_all_cpu());
return res;
}
}
return StatusTuple(0);
}
StatusTuple BPFPerfBuffer::close_on_cpu(int cpu) {
auto it = cpu_readers_.find(cpu);
if (it == cpu_readers_.end())
return StatusTuple(0);
perf_reader_free(static_cast<void*>(it->second));
if (!remove(const_cast<int*>(&(it->first))))
return StatusTuple(-1, "Unable to close perf buffer on CPU %d", it->first);
cpu_readers_.erase(it);
return StatusTuple(0);
}
StatusTuple BPFPerfBuffer::close_all_cpu() {
std::string errors;
bool has_error = false;
if (epfd_ >= 0) {
int close_res = close(epfd_);
epfd_ = -1;
ep_events_.reset();
if (close_res != 0) {
has_error = true;
errors += std::string(std::strerror(errno)) + "\n";
}
}
std::vector<int> opened_cpus;
for (auto it : cpu_readers_)
opened_cpus.push_back(it.first);
for (int i : opened_cpus) {
auto res = close_on_cpu(i);
if (res.code() != 0) {
errors += "Failed to close CPU" + std::to_string(i) + " perf buffer: ";
errors += res.msg() + "\n";
has_error = true;
}
}
if (has_error)
return StatusTuple(-1, errors);
return StatusTuple(0);
}
int BPFPerfBuffer::poll(int timeout_ms) {
if (epfd_ < 0)
return -1;
int cnt =
epoll_wait(epfd_, ep_events_.get(), cpu_readers_.size(), timeout_ms);
for (int i = 0; i < cnt; i++)
perf_reader_event_read(static_cast<perf_reader*>(ep_events_[i].data.ptr));
return cnt;
}
BPFPerfBuffer::~BPFPerfBuffer() {
auto res = close_all_cpu();
if (res.code() != 0)
std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
<< std::endl;
}
BPFPerfEventArray::BPFPerfEventArray(const TableDesc& desc)
: BPFTableBase<int, int>(desc) {
if (desc.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
throw std::invalid_argument("Table '" + desc.name +
"' is not a perf event array");
}
StatusTuple BPFPerfEventArray::open_all_cpu(uint32_t type, uint64_t config) {
if (cpu_fds_.size() != 0)
return StatusTuple(-1, "Previously opened perf event not cleaned");
std::vector<int> cpus = get_online_cpus();
for (int i : cpus) {
auto res = open_on_cpu(i, type, config);
if (res.code() != 0) {
TRY2(close_all_cpu());
return res;
}
}
return StatusTuple(0);
}
StatusTuple BPFPerfEventArray::close_all_cpu() {
std::string errors;
bool has_error = false;
std::vector<int> opened_cpus;
for (auto it : cpu_fds_)
opened_cpus.push_back(it.first);
for (int i : opened_cpus) {
auto res = close_on_cpu(i);
if (res.code() != 0) {
errors += "Failed to close CPU" + std::to_string(i) + " perf event: ";
errors += res.msg() + "\n";
has_error = true;
}
}
if (has_error)
return StatusTuple(-1, errors);
return StatusTuple(0);
}
StatusTuple BPFPerfEventArray::open_on_cpu(int cpu, uint32_t type,
uint64_t config) {
if (cpu_fds_.find(cpu) != cpu_fds_.end())
return StatusTuple(-1, "Perf event already open on CPU %d", cpu);
int fd = bpf_open_perf_event(type, config, -1, cpu);
if (fd < 0) {
return StatusTuple(-1, "Error constructing perf event %" PRIu32 ":%" PRIu64,
type, config);
}
if (!update(&cpu, &fd)) {
bpf_close_perf_event_fd(fd);
return StatusTuple(-1, "Unable to open perf event on CPU %d: %s", cpu,
std::strerror(errno));
}
cpu_fds_[cpu] = fd;
return StatusTuple(0);
}
StatusTuple BPFPerfEventArray::close_on_cpu(int cpu) {
auto it = cpu_fds_.find(cpu);
if (it == cpu_fds_.end()) {
return StatusTuple(0);
}
bpf_close_perf_event_fd(it->second);
cpu_fds_.erase(it);
return StatusTuple(0);
}
BPFPerfEventArray::~BPFPerfEventArray() {
auto res = close_all_cpu();
if (res.code() != 0) {
std::cerr << "Failed to close all perf buffer on destruction: " << res.msg()
<< std::endl;
}
}
BPFProgTable::BPFProgTable(const TableDesc& desc)
: BPFTableBase<int, int>(desc) {
if (desc.type != BPF_MAP_TYPE_PROG_ARRAY)
throw std::invalid_argument("Table '" + desc.name +
"' is not a prog table");
}
StatusTuple BPFProgTable::update_value(const int& index, const int& prog_fd) {
if (!this->update(const_cast<int*>(&index), const_cast<int*>(&prog_fd)))
return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
return StatusTuple(0);
}
StatusTuple BPFProgTable::remove_value(const int& index) {
if (!this->remove(const_cast<int*>(&index)))
return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
return StatusTuple(0);
}
BPFCgroupArray::BPFCgroupArray(const TableDesc& desc)
: BPFTableBase<int, int>(desc) {
if (desc.type != BPF_MAP_TYPE_CGROUP_ARRAY)
throw std::invalid_argument("Table '" + desc.name +
"' is not a cgroup array");
}
StatusTuple BPFCgroupArray::update_value(const int& index,
const int& cgroup2_fd) {
if (!this->update(const_cast<int*>(&index), const_cast<int*>(&cgroup2_fd)))
return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
return StatusTuple(0);
}
StatusTuple BPFCgroupArray::update_value(const int& index,
const std::string& cgroup2_path) {
FileDesc f(::open(cgroup2_path.c_str(), O_RDONLY | O_CLOEXEC));
if ((int)f < 0)
return StatusTuple(-1, "Unable to open %s", cgroup2_path.c_str());
TRY2(update_value(index, (int)f));
return StatusTuple(0);
}
StatusTuple BPFCgroupArray::remove_value(const int& index) {
if (!this->remove(const_cast<int*>(&index)))
return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
return StatusTuple(0);
}
BPFDevmapTable::BPFDevmapTable(const TableDesc& desc)
: BPFTableBase<int, int>(desc) {
if(desc.type != BPF_MAP_TYPE_DEVMAP)
throw std::invalid_argument("Table '" + desc.name +
"' is not a devmap table");
}
StatusTuple BPFDevmapTable::update_value(const int& index,
const int& value) {
if (!this->update(const_cast<int*>(&index), const_cast<int*>(&value)))
return StatusTuple(-1, "Error updating value: %s", std::strerror(errno));
return StatusTuple(0);
}
StatusTuple BPFDevmapTable::get_value(const int& index,
int& value) {
if (!this->lookup(const_cast<int*>(&index), &value))
return StatusTuple(-1, "Error getting value: %s", std::strerror(errno));
return StatusTuple(0);
}
StatusTuple BPFDevmapTable::remove_value(const int& index) {
if (!this->remove(const_cast<int*>(&index)))
return StatusTuple(-1, "Error removing value: %s", std::strerror(errno));
return StatusTuple(0);
}
} // namespace ebpf