blob: fd703239e9f9a848e61cea952ef287ff86e57811 [file] [log] [blame]
/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#undef LOG_TAG
#define LOG_TAG "GpuWork"
#define ATRACE_TAG ATRACE_TAG_GRAPHICS
#include "gpuwork/GpuWork.h"
#include <android-base/stringprintf.h>
#include <binder/PermissionCache.h>
#include <bpf/WaitForProgsLoaded.h>
#include <libbpf.h>
#include <log/log.h>
#include <random>
#include <stats_event.h>
#include <statslog.h>
#include <unistd.h>
#include <utils/Timers.h>
#include <utils/Trace.h>
#include <bit>
#include <chrono>
#include <cstdint>
#include <limits>
#include <map>
#include <mutex>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "gpuwork/gpuWork.h"
#define ONE_MS_IN_NS (10000000)
namespace android {
namespace gpuwork {
namespace {
bool lessThanGpuIdUid(const android::gpuwork::GpuIdUid& l, const android::gpuwork::GpuIdUid& r) {
return std::tie(l.gpu_id, l.uid) < std::tie(r.gpu_id, r.uid);
}
size_t hashGpuIdUid(const android::gpuwork::GpuIdUid& gpuIdUid) {
return static_cast<size_t>((gpuIdUid.gpu_id << 5U) + gpuIdUid.uid);
}
bool equalGpuIdUid(const android::gpuwork::GpuIdUid& l, const android::gpuwork::GpuIdUid& r) {
return std::tie(l.gpu_id, l.uid) == std::tie(r.gpu_id, r.uid);
}
// Gets a BPF map from |mapPath|.
template <class Key, class Value>
bool getBpfMap(const char* mapPath, bpf::BpfMap<Key, Value>* out) {
errno = 0;
auto map = bpf::BpfMap<Key, Value>(mapPath);
if (!map.isValid()) {
ALOGW("Failed to create bpf map from %s [%d(%s)]", mapPath, errno, strerror(errno));
return false;
}
*out = std::move(map);
return true;
}
template <typename SourceType>
inline int32_t cast_int32(SourceType) = delete;
template <typename SourceType>
inline int32_t bitcast_int32(SourceType) = delete;
template <>
inline int32_t bitcast_int32<uint32_t>(uint32_t source) {
int32_t result;
memcpy(&result, &source, sizeof(result));
return result;
}
} // namespace
using base::StringAppendF;
GpuWork::~GpuWork() {
// If we created our clearer thread, then we must stop it and join it.
if (mMapClearerThread.joinable()) {
// Tell the thread to terminate.
{
std::scoped_lock<std::mutex> lock(mMutex);
mIsTerminating = true;
mIsTerminatingConditionVariable.notify_all();
}
// Now, we can join it.
mMapClearerThread.join();
}
{
std::scoped_lock<std::mutex> lock(mMutex);
if (mStatsdRegistered) {
AStatsManager_clearPullAtomCallback(android::util::GPU_WORK_PER_UID);
}
}
bpf_detach_tracepoint("power", "gpu_work_period");
}
void GpuWork::initialize() {
// Make sure BPF programs are loaded.
bpf::waitForProgsLoaded();
waitForPermissions();
// Get the BPF maps before trying to attach the BPF program; if we can't get
// the maps then there is no point in attaching the BPF program.
{
std::lock_guard<std::mutex> lock(mMutex);
if (!getBpfMap("/sys/fs/bpf/map_gpuWork_gpu_work_map", &mGpuWorkMap)) {
return;
}
if (!getBpfMap("/sys/fs/bpf/map_gpuWork_gpu_work_global_data", &mGpuWorkGlobalDataMap)) {
return;
}
mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
}
// Attach the tracepoint.
if (!attachTracepoint("/sys/fs/bpf/prog_gpuWork_tracepoint_power_gpu_work_period", "power",
"gpu_work_period")) {
return;
}
// Create the map clearer thread, and store it to |mMapClearerThread|.
std::thread thread([this]() { periodicallyClearMap(); });
mMapClearerThread.swap(thread);
{
std::lock_guard<std::mutex> lock(mMutex);
AStatsManager_setPullAtomCallback(int32_t{android::util::GPU_WORK_PER_UID}, nullptr,
GpuWork::pullAtomCallback, this);
mStatsdRegistered = true;
}
ALOGI("Initialized!");
mInitialized.store(true);
}
void GpuWork::dump(const Vector<String16>& /* args */, std::string* result) {
if (!mInitialized.load()) {
result->append("GPU work information is not available.\n");
return;
}
// Ordered map ensures output data is sorted.
std::map<GpuIdUid, UidTrackingInfo, decltype(lessThanGpuIdUid)*> dumpMap(&lessThanGpuIdUid);
{
std::lock_guard<std::mutex> lock(mMutex);
if (!mGpuWorkMap.isValid()) {
result->append("GPU work map is not available.\n");
return;
}
// Iteration of BPF hash maps can be unreliable (no data races, but elements
// may be repeated), as the map is typically being modified by other
// threads. The buckets are all preallocated. Our eBPF program only updates
// entries (in-place) or adds entries. |GpuWork| only iterates or clears the
// map while holding |mMutex|. Given this, we should be able to iterate over
// all elements reliably. Nevertheless, we copy into a map to avoid
// duplicates.
// Note that userspace reads of BPF maps make a copy of the value, and
// thus the returned value is not being concurrently accessed by the BPF
// program (no atomic reads needed below).
mGpuWorkMap.iterateWithValue(
[&dumpMap](const GpuIdUid& key, const UidTrackingInfo& value,
const android::bpf::BpfMap<GpuIdUid, UidTrackingInfo>&)
-> base::Result<void> {
dumpMap[key] = value;
return {};
});
}
// Dump work information.
// E.g.
// GPU work information.
// gpu_id uid total_active_duration_ns total_inactive_duration_ns
// 0 1000 0 0
// 0 1003 1234 123
// [errors:3]0 1006 4567 456
// Header.
result->append("GPU work information.\ngpu_id uid total_active_duration_ns "
"total_inactive_duration_ns\n");
for (const auto& idToUidInfo : dumpMap) {
if (idToUidInfo.second.error_count) {
StringAppendF(result, "[errors:%" PRIu32 "]", idToUidInfo.second.error_count);
}
StringAppendF(result, "%" PRIu32 " %" PRIu32 " %" PRIu64 " %" PRIu64 "\n",
idToUidInfo.first.gpu_id, idToUidInfo.first.uid,
idToUidInfo.second.total_active_duration_ns,
idToUidInfo.second.total_inactive_duration_ns);
}
}
bool GpuWork::attachTracepoint(const char* programPath, const char* tracepointGroup,
const char* tracepointName) {
errno = 0;
base::unique_fd fd(bpf::retrieveProgram(programPath));
if (fd < 0) {
ALOGW("Failed to retrieve pinned program from %s [%d(%s)]", programPath, errno,
strerror(errno));
return false;
}
// Attach the program to the tracepoint. The tracepoint is automatically enabled.
errno = 0;
int count = 0;
while (bpf_attach_tracepoint(fd.get(), tracepointGroup, tracepointName) < 0) {
if (++count > kGpuWaitTimeoutSeconds) {
ALOGW("Failed to attach bpf program to %s/%s tracepoint [%d(%s)]", tracepointGroup,
tracepointName, errno, strerror(errno));
return false;
}
// Retry until GPU driver loaded or timeout.
sleep(1);
errno = 0;
}
return true;
}
AStatsManager_PullAtomCallbackReturn GpuWork::pullAtomCallback(int32_t atomTag,
AStatsEventList* data,
void* cookie) {
ATRACE_CALL();
GpuWork* gpuWork = reinterpret_cast<GpuWork*>(cookie);
if (atomTag == android::util::GPU_WORK_PER_UID) {
return gpuWork->pullWorkAtoms(data);
}
return AStatsManager_PULL_SKIP;
}
AStatsManager_PullAtomCallbackReturn GpuWork::pullWorkAtoms(AStatsEventList* data) {
ATRACE_CALL();
if (!data || !mInitialized.load()) {
return AStatsManager_PULL_SKIP;
}
std::lock_guard<std::mutex> lock(mMutex);
if (!mGpuWorkMap.isValid()) {
return AStatsManager_PULL_SKIP;
}
std::unordered_map<GpuIdUid, UidTrackingInfo, decltype(hashGpuIdUid)*, decltype(equalGpuIdUid)*>
workMap(32, &hashGpuIdUid, &equalGpuIdUid);
// Iteration of BPF hash maps can be unreliable (no data races, but elements
// may be repeated), as the map is typically being modified by other
// threads. The buckets are all preallocated. Our eBPF program only updates
// entries (in-place) or adds entries. |GpuWork| only iterates or clears the
// map while holding |mMutex|. Given this, we should be able to iterate over
// all elements reliably. Nevertheless, we copy into a map to avoid
// duplicates.
// Note that userspace reads of BPF maps make a copy of the value, and thus
// the returned value is not being concurrently accessed by the BPF program
// (no atomic reads needed below).
mGpuWorkMap.iterateWithValue([&workMap](const GpuIdUid& key, const UidTrackingInfo& value,
const android::bpf::BpfMap<GpuIdUid, UidTrackingInfo>&)
-> base::Result<void> {
workMap[key] = value;
return {};
});
// Get a list of just the UIDs; the order does not matter.
std::vector<Uid> uids;
// Get a list of the GPU IDs, in order.
std::set<uint32_t> gpuIds;
{
// To avoid adding duplicate UIDs.
std::unordered_set<Uid> addedUids;
for (const auto& workInfo : workMap) {
if (addedUids.insert(workInfo.first.uid).second) {
// Insertion was successful.
uids.push_back(workInfo.first.uid);
}
gpuIds.insert(workInfo.first.gpu_id);
}
}
ALOGI("pullWorkAtoms: uids.size() == %zu", uids.size());
ALOGI("pullWorkAtoms: gpuIds.size() == %zu", gpuIds.size());
if (gpuIds.size() > kNumGpusHardLimit) {
// If we observe a very high number of GPUs then something has probably
// gone wrong, so don't log any atoms.
return AStatsManager_PULL_SKIP;
}
size_t numSampledUids = kNumSampledUids;
if (gpuIds.size() > kNumGpusSoftLimit) {
// If we observe a high number of GPUs then we just sample 1 UID.
numSampledUids = 1;
}
// Remove all UIDs that do not have at least |kMinGpuTimeNanoseconds| on at
// least one GPU.
{
auto uidIt = uids.begin();
while (uidIt != uids.end()) {
bool hasEnoughGpuTime = false;
for (uint32_t gpuId : gpuIds) {
auto infoIt = workMap.find(GpuIdUid{gpuId, *uidIt});
if (infoIt == workMap.end()) {
continue;
}
if (infoIt->second.total_active_duration_ns +
infoIt->second.total_inactive_duration_ns >=
kMinGpuTimeNanoseconds) {
hasEnoughGpuTime = true;
break;
}
}
if (hasEnoughGpuTime) {
++uidIt;
} else {
uidIt = uids.erase(uidIt);
}
}
}
ALOGI("pullWorkAtoms: after removing uids with very low GPU time: uids.size() == %zu",
uids.size());
std::random_device device;
std::default_random_engine random_engine(device());
// If we have more than |numSampledUids| UIDs, choose |numSampledUids|
// random UIDs. We swap them to the front of the list. Given the list
// indices 0..i..n-1, we have the following inclusive-inclusive ranges:
// - [0, i-1] == the randomly chosen elements.
// - [i, n-1] == the remaining unchosen elements.
if (uids.size() > numSampledUids) {
for (size_t i = 0; i < numSampledUids; ++i) {
std::uniform_int_distribution<size_t> uniform_dist(i, uids.size() - 1);
size_t random_index = uniform_dist(random_engine);
std::swap(uids[i], uids[random_index]);
}
// Only keep the front |numSampledUids| elements.
uids.resize(numSampledUids);
}
ALOGI("pullWorkAtoms: after random selection: uids.size() == %zu", uids.size());
auto now = std::chrono::steady_clock::now();
long long duration =
std::chrono::duration_cast<std::chrono::seconds>(now - mPreviousMapClearTimePoint)
.count();
if (duration > std::numeric_limits<int32_t>::max() || duration < 0) {
// This is essentially impossible. If it does somehow happen, give up,
// but still clear the map.
clearMap();
return AStatsManager_PULL_SKIP;
}
// Log an atom for each (gpu id, uid) pair for which we have data.
for (uint32_t gpuId : gpuIds) {
for (Uid uid : uids) {
auto it = workMap.find(GpuIdUid{gpuId, uid});
if (it == workMap.end()) {
continue;
}
const UidTrackingInfo& info = it->second;
uint64_t total_active_duration_ms = info.total_active_duration_ns / ONE_MS_IN_NS;
uint64_t total_inactive_duration_ms = info.total_inactive_duration_ns / ONE_MS_IN_NS;
// Skip this atom if any numbers are out of range. |duration| is
// already checked above.
if (total_active_duration_ms > std::numeric_limits<int32_t>::max() ||
total_inactive_duration_ms > std::numeric_limits<int32_t>::max()) {
continue;
}
ALOGI("pullWorkAtoms: adding stats for GPU ID %" PRIu32 "; UID %" PRIu32, gpuId, uid);
android::util::addAStatsEvent(data, int32_t{android::util::GPU_WORK_PER_UID},
// uid
bitcast_int32(uid),
// gpu_id
bitcast_int32(gpuId),
// time_duration_seconds
static_cast<int32_t>(duration),
// total_active_duration_millis
static_cast<int32_t>(total_active_duration_ms),
// total_inactive_duration_millis
static_cast<int32_t>(total_inactive_duration_ms));
}
}
clearMap();
return AStatsManager_PULL_SUCCESS;
}
void GpuWork::periodicallyClearMap() {
std::unique_lock<std::mutex> lock(mMutex);
auto previousTime = std::chrono::steady_clock::now();
while (true) {
if (mIsTerminating) {
break;
}
auto nextTime = std::chrono::steady_clock::now();
auto differenceSeconds =
std::chrono::duration_cast<std::chrono::seconds>(nextTime - previousTime);
if (differenceSeconds.count() > kMapClearerWaitDurationSeconds) {
// It has been >1 hour, so clear the map, if needed.
clearMapIfNeeded();
// We only update |previousTime| if we actually checked the map.
previousTime = nextTime;
}
// Sleep for ~1 hour. It does not matter if we don't check the map for 2
// hours.
mIsTerminatingConditionVariable.wait_for(lock,
std::chrono::seconds{
kMapClearerWaitDurationSeconds});
}
}
void GpuWork::clearMapIfNeeded() {
if (!mInitialized.load() || !mGpuWorkMap.isValid() || !mGpuWorkGlobalDataMap.isValid()) {
ALOGW("Map clearing could not occur because we are not initialized properly");
return;
}
base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
if (!globalData.ok()) {
ALOGW("Could not read BPF global data map entry");
return;
}
// Note that userspace reads of BPF maps make a copy of the value, and thus
// the return value is not being concurrently accessed by the BPF program
// (no atomic reads needed below).
uint64_t numEntries = globalData.value().num_map_entries;
// If the map is <=75% full, we do nothing.
if (numEntries <= (kMaxTrackedGpuIdUids / 4) * 3) {
return;
}
clearMap();
}
void GpuWork::clearMap() {
if (!mInitialized.load() || !mGpuWorkMap.isValid() || !mGpuWorkGlobalDataMap.isValid()) {
ALOGW("Map clearing could not occur because we are not initialized properly");
return;
}
base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
if (!globalData.ok()) {
ALOGW("Could not read BPF global data map entry");
return;
}
// Iterating BPF maps to delete keys is tricky. If we just repeatedly call
// |getFirstKey()| and delete that, we may loop forever (or for a long time)
// because our BPF program might be repeatedly re-adding keys. Also, even if
// we limit the number of elements we try to delete, we might only delete
// new entries, leaving old entries in the map. If we delete a key A and
// then call |getNextKey(A)|, the first key in the map is returned, so we
// have the same issue.
//
// Thus, we instead get the next key and then delete the previous key. We
// also limit the number of deletions we try, just in case.
base::Result<GpuIdUid> key = mGpuWorkMap.getFirstKey();
for (size_t i = 0; i < kMaxTrackedGpuIdUids; ++i) {
if (!key.ok()) {
break;
}
base::Result<GpuIdUid> previousKey = key;
key = mGpuWorkMap.getNextKey(previousKey.value());
mGpuWorkMap.deleteValue(previousKey.value());
}
// Reset our counter; |globalData| is a copy of the data, so we have to use
// |writeValue|.
globalData.value().num_map_entries = 0;
mGpuWorkGlobalDataMap.writeValue(0, globalData.value(), BPF_ANY);
// Update |mPreviousMapClearTimePoint| so we know when we started collecting
// the stats.
mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
}
void GpuWork::waitForPermissions() {
const String16 permissionRegisterStatsPullAtom(kPermissionRegisterStatsPullAtom);
int count = 0;
while (!PermissionCache::checkPermission(permissionRegisterStatsPullAtom, getpid(), getuid())) {
if (++count > kPermissionsWaitTimeoutSeconds) {
ALOGW("Timed out waiting for android.permission.REGISTER_STATS_PULL_ATOM");
return;
}
// Retry.
sleep(1);
}
}
} // namespace gpuwork
} // namespace android