services/gpuservice/gpuwork/GpuWork.cpp - platform/frameworks/native - Git at Google

 /*
  * Copyright 2022 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #undef LOG_TAG
 #define LOG_TAG "GpuWork"
 #define ATRACE_TAG ATRACE_TAG_GRAPHICS

 #include "gpuwork/GpuWork.h"

 #include <android-base/stringprintf.h>
 #include <binder/PermissionCache.h>
 #include <bpf/WaitForProgsLoaded.h>
 #include <libbpf.h>
 #include <log/log.h>
 #include <random>
 #include <stats_event.h>
 #include <statslog.h>
 #include <unistd.h>
 #include <utils/Timers.h>
 #include <utils/Trace.h>

 #include <bit>
 #include <chrono>
 #include <cstdint>
 #include <limits>
 #include <map>
 #include <mutex>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>

 #include "gpuwork/gpuWork.h"

 #define ONE_MS_IN_NS (10000000)

 namespace android {
 namespace gpuwork {

 namespace {

 bool lessThanGpuIdUid(const android::gpuwork::GpuIdUid& l, const android::gpuwork::GpuIdUid& r) {
     return std::tie(l.gpu_id, l.uid) < std::tie(r.gpu_id, r.uid);
 }

 size_t hashGpuIdUid(const android::gpuwork::GpuIdUid& gpuIdUid) {
     return static_cast<size_t>((gpuIdUid.gpu_id << 5U) + gpuIdUid.uid);
 }

 bool equalGpuIdUid(const android::gpuwork::GpuIdUid& l, const android::gpuwork::GpuIdUid& r) {
     return std::tie(l.gpu_id, l.uid) == std::tie(r.gpu_id, r.uid);
 }

 // Gets a BPF map from |mapPath|.
 template <class Key, class Value>
 bool getBpfMap(const char* mapPath, bpf::BpfMap<Key, Value>* out) {
     errno = 0;
     auto map = bpf::BpfMap<Key, Value>(mapPath);
     if (!map.isValid()) {
         ALOGW("Failed to create bpf map from %s [%d(%s)]", mapPath, errno, strerror(errno));
         return false;
     }
     *out = std::move(map);
     return true;
 }

 template <typename SourceType>
 inline int32_t cast_int32(SourceType) = delete;

 template <typename SourceType>
 inline int32_t bitcast_int32(SourceType) = delete;

 template <>
 inline int32_t bitcast_int32<uint32_t>(uint32_t source) {
     int32_t result;
     memcpy(&result, &source, sizeof(result));
     return result;
 }

 } // namespace

 using base::StringAppendF;

 GpuWork::~GpuWork() {
     // If we created our clearer thread, then we must stop it and join it.
     if (mMapClearerThread.joinable()) {
         // Tell the thread to terminate.
         {
             std::scoped_lock<std::mutex> lock(mMutex);
             mIsTerminating = true;
             mIsTerminatingConditionVariable.notify_all();
         }

         // Now, we can join it.
         mMapClearerThread.join();
     }

     {
         std::scoped_lock<std::mutex> lock(mMutex);
         if (mStatsdRegistered) {
             AStatsManager_clearPullAtomCallback(android::util::GPU_WORK_PER_UID);
         }
     }

     bpf_detach_tracepoint("power", "gpu_work_period");
 }

 void GpuWork::initialize() {
     // Make sure BPF programs are loaded.
     bpf::waitForProgsLoaded();

     waitForPermissions();

     // Get the BPF maps before trying to attach the BPF program; if we can't get
     // the maps then there is no point in attaching the BPF program.
     {
         std::lock_guard<std::mutex> lock(mMutex);

         if (!getBpfMap("/sys/fs/bpf/map_gpuWork_gpu_work_map", &mGpuWorkMap)) {
             return;
         }

         if (!getBpfMap("/sys/fs/bpf/map_gpuWork_gpu_work_global_data", &mGpuWorkGlobalDataMap)) {
             return;
         }

         mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
     }

     // Attach the tracepoint.
     if (!attachTracepoint("/sys/fs/bpf/prog_gpuWork_tracepoint_power_gpu_work_period", "power",
                           "gpu_work_period")) {
         return;
     }

     // Create the map clearer thread, and store it to |mMapClearerThread|.
     std::thread thread([this]() { periodicallyClearMap(); });

     mMapClearerThread.swap(thread);

     {
         std::lock_guard<std::mutex> lock(mMutex);
         AStatsManager_setPullAtomCallback(int32_t{android::util::GPU_WORK_PER_UID}, nullptr,
                                           GpuWork::pullAtomCallback, this);
         mStatsdRegistered = true;
     }

     ALOGI("Initialized!");

     mInitialized.store(true);
 }

 void GpuWork::dump(const Vector<String16>& /* args */, std::string* result) {
     if (!mInitialized.load()) {
         result->append("GPU work information is not available.\n");
         return;
     }

     // Ordered map ensures output data is sorted.
     std::map<GpuIdUid, UidTrackingInfo, decltype(lessThanGpuIdUid)*> dumpMap(&lessThanGpuIdUid);

     {
         std::lock_guard<std::mutex> lock(mMutex);

         if (!mGpuWorkMap.isValid()) {
             result->append("GPU work map is not available.\n");
             return;
         }

         // Iteration of BPF hash maps can be unreliable (no data races, but elements
         // may be repeated), as the map is typically being modified by other
         // threads. The buckets are all preallocated. Our eBPF program only updates
         // entries (in-place) or adds entries. |GpuWork| only iterates or clears the
         // map while holding |mMutex|. Given this, we should be able to iterate over
         // all elements reliably. Nevertheless, we copy into a map to avoid
         // duplicates.

         // Note that userspace reads of BPF maps make a copy of the value, and
         // thus the returned value is not being concurrently accessed by the BPF
         // program (no atomic reads needed below).

         mGpuWorkMap.iterateWithValue(
                 [&dumpMap](const GpuIdUid& key, const UidTrackingInfo& value,
                            const android::bpf::BpfMap<GpuIdUid, UidTrackingInfo>&)
                         -> base::Result<void> {
                     dumpMap[key] = value;
                     return {};
                 });
     }

     // Dump work information.
     // E.g.
     // GPU work information.
     // gpu_id uid total_active_duration_ns total_inactive_duration_ns
     // 0 1000 0 0
     // 0 1003 1234 123
     // [errors:3]0 1006 4567 456

     // Header.
     result->append("GPU work information.\ngpu_id uid total_active_duration_ns "
                    "total_inactive_duration_ns\n");

     for (const auto& idToUidInfo : dumpMap) {
         if (idToUidInfo.second.error_count) {
             StringAppendF(result, "[errors:%" PRIu32 "]", idToUidInfo.second.error_count);
         }
         StringAppendF(result, "%" PRIu32 " %" PRIu32 " %" PRIu64 " %" PRIu64 "\n",
                       idToUidInfo.first.gpu_id, idToUidInfo.first.uid,
                       idToUidInfo.second.total_active_duration_ns,
                       idToUidInfo.second.total_inactive_duration_ns);
     }
 }

 bool GpuWork::attachTracepoint(const char* programPath, const char* tracepointGroup,
                                const char* tracepointName) {
     errno = 0;
     base::unique_fd fd(bpf::retrieveProgram(programPath));
     if (fd < 0) {
         ALOGW("Failed to retrieve pinned program from %s [%d(%s)]", programPath, errno,
               strerror(errno));
         return false;
     }

     // Attach the program to the tracepoint. The tracepoint is automatically enabled.
     errno = 0;
     int count = 0;
     while (bpf_attach_tracepoint(fd.get(), tracepointGroup, tracepointName) < 0) {
         if (++count > kGpuWaitTimeoutSeconds) {
             ALOGW("Failed to attach bpf program to %s/%s tracepoint [%d(%s)]", tracepointGroup,
                   tracepointName, errno, strerror(errno));
             return false;
         }
         // Retry until GPU driver loaded or timeout.
         sleep(1);
         errno = 0;
     }

     return true;
 }

 AStatsManager_PullAtomCallbackReturn GpuWork::pullAtomCallback(int32_t atomTag,
                                                                AStatsEventList* data,
                                                                void* cookie) {
     ATRACE_CALL();

     GpuWork* gpuWork = reinterpret_cast<GpuWork*>(cookie);
     if (atomTag == android::util::GPU_WORK_PER_UID) {
         return gpuWork->pullWorkAtoms(data);
     }

     return AStatsManager_PULL_SKIP;
 }

 AStatsManager_PullAtomCallbackReturn GpuWork::pullWorkAtoms(AStatsEventList* data) {
     ATRACE_CALL();

     if (!data || !mInitialized.load()) {
         return AStatsManager_PULL_SKIP;
     }

     std::lock_guard<std::mutex> lock(mMutex);

     if (!mGpuWorkMap.isValid()) {
         return AStatsManager_PULL_SKIP;
     }

     std::unordered_map<GpuIdUid, UidTrackingInfo, decltype(hashGpuIdUid)*, decltype(equalGpuIdUid)*>
             workMap(32, &hashGpuIdUid, &equalGpuIdUid);

     // Iteration of BPF hash maps can be unreliable (no data races, but elements
     // may be repeated), as the map is typically being modified by other
     // threads. The buckets are all preallocated. Our eBPF program only updates
     // entries (in-place) or adds entries. |GpuWork| only iterates or clears the
     // map while holding |mMutex|. Given this, we should be able to iterate over
     // all elements reliably. Nevertheless, we copy into a map to avoid
     // duplicates.

     // Note that userspace reads of BPF maps make a copy of the value, and thus
     // the returned value is not being concurrently accessed by the BPF program
     // (no atomic reads needed below).

     mGpuWorkMap.iterateWithValue([&workMap](const GpuIdUid& key, const UidTrackingInfo& value,
                                             const android::bpf::BpfMap<GpuIdUid, UidTrackingInfo>&)
                                          -> base::Result<void> {
         workMap[key] = value;
         return {};
     });

     // Get a list of just the UIDs; the order does not matter.
     std::vector<Uid> uids;
     // Get a list of the GPU IDs, in order.
     std::set<uint32_t> gpuIds;
     {
         // To avoid adding duplicate UIDs.
         std::unordered_set<Uid> addedUids;

         for (const auto& workInfo : workMap) {
             if (addedUids.insert(workInfo.first.uid).second) {
                 // Insertion was successful.
                 uids.push_back(workInfo.first.uid);
             }
             gpuIds.insert(workInfo.first.gpu_id);
         }
     }

     ALOGI("pullWorkAtoms: uids.size() == %zu", uids.size());
     ALOGI("pullWorkAtoms: gpuIds.size() == %zu", gpuIds.size());

     if (gpuIds.size() > kNumGpusHardLimit) {
         // If we observe a very high number of GPUs then something has probably
         // gone wrong, so don't log any atoms.
         return AStatsManager_PULL_SKIP;
     }

     size_t numSampledUids = kNumSampledUids;

     if (gpuIds.size() > kNumGpusSoftLimit) {
         // If we observe a high number of GPUs then we just sample 1 UID.
         numSampledUids = 1;
     }

     // Remove all UIDs that do not have at least |kMinGpuTimeNanoseconds| on at
     // least one GPU.
     {
         auto uidIt = uids.begin();
         while (uidIt != uids.end()) {
             bool hasEnoughGpuTime = false;
             for (uint32_t gpuId : gpuIds) {
                 auto infoIt = workMap.find(GpuIdUid{gpuId, *uidIt});
                 if (infoIt == workMap.end()) {
                     continue;
                 }
                 if (infoIt->second.total_active_duration_ns +
                             infoIt->second.total_inactive_duration_ns >=
                     kMinGpuTimeNanoseconds) {
                     hasEnoughGpuTime = true;
                     break;
                 }
             }
             if (hasEnoughGpuTime) {
                 ++uidIt;
             } else {
                 uidIt = uids.erase(uidIt);
             }
         }
     }

     ALOGI("pullWorkAtoms: after removing uids with very low GPU time: uids.size() == %zu",
           uids.size());

     std::random_device device;
     std::default_random_engine random_engine(device());

     // If we have more than |numSampledUids| UIDs, choose |numSampledUids|
     // random UIDs. We swap them to the front of the list. Given the list
     // indices 0..i..n-1, we have the following inclusive-inclusive ranges:
     // - [0, i-1] == the randomly chosen elements.
     // - [i, n-1] == the remaining unchosen elements.
     if (uids.size() > numSampledUids) {
         for (size_t i = 0; i < numSampledUids; ++i) {
             std::uniform_int_distribution<size_t> uniform_dist(i, uids.size() - 1);
             size_t random_index = uniform_dist(random_engine);
             std::swap(uids[i], uids[random_index]);
         }
         // Only keep the front |numSampledUids| elements.
         uids.resize(numSampledUids);
     }

     ALOGI("pullWorkAtoms: after random selection: uids.size() == %zu", uids.size());

     auto now = std::chrono::steady_clock::now();
     long long duration =
             std::chrono::duration_cast<std::chrono::seconds>(now - mPreviousMapClearTimePoint)
                     .count();
     if (duration > std::numeric_limits<int32_t>::max() || duration < 0) {
         // This is essentially impossible. If it does somehow happen, give up,
         // but still clear the map.
         clearMap();
         return AStatsManager_PULL_SKIP;
     }

     // Log an atom for each (gpu id, uid) pair for which we have data.
     for (uint32_t gpuId : gpuIds) {
         for (Uid uid : uids) {
             auto it = workMap.find(GpuIdUid{gpuId, uid});
             if (it == workMap.end()) {
                 continue;
             }
             const UidTrackingInfo& info = it->second;

             uint64_t total_active_duration_ms = info.total_active_duration_ns / ONE_MS_IN_NS;
             uint64_t total_inactive_duration_ms = info.total_inactive_duration_ns / ONE_MS_IN_NS;

             // Skip this atom if any numbers are out of range. |duration| is
             // already checked above.
             if (total_active_duration_ms > std::numeric_limits<int32_t>::max() ||
                 total_inactive_duration_ms > std::numeric_limits<int32_t>::max()) {
                 continue;
             }

             ALOGI("pullWorkAtoms: adding stats for GPU ID %" PRIu32 "; UID %" PRIu32, gpuId, uid);
             android::util::addAStatsEvent(data, int32_t{android::util::GPU_WORK_PER_UID},
                                           // uid
                                           bitcast_int32(uid),
                                           // gpu_id
                                           bitcast_int32(gpuId),
                                           // time_duration_seconds
                                           static_cast<int32_t>(duration),
                                           // total_active_duration_millis
                                           static_cast<int32_t>(total_active_duration_ms),
                                           // total_inactive_duration_millis
                                           static_cast<int32_t>(total_inactive_duration_ms));
         }
     }
     clearMap();
     return AStatsManager_PULL_SUCCESS;
 }

 void GpuWork::periodicallyClearMap() {
     std::unique_lock<std::mutex> lock(mMutex);

     auto previousTime = std::chrono::steady_clock::now();

     while (true) {
         if (mIsTerminating) {
             break;
         }
         auto nextTime = std::chrono::steady_clock::now();
         auto differenceSeconds =
                 std::chrono::duration_cast<std::chrono::seconds>(nextTime - previousTime);
         if (differenceSeconds.count() > kMapClearerWaitDurationSeconds) {
             // It has been >1 hour, so clear the map, if needed.
             clearMapIfNeeded();
             // We only update |previousTime| if we actually checked the map.
             previousTime = nextTime;
         }
         // Sleep for ~1 hour. It does not matter if we don't check the map for 2
         // hours.
         mIsTerminatingConditionVariable.wait_for(lock,
                                                  std::chrono::seconds{
                                                          kMapClearerWaitDurationSeconds});
     }
 }

 void GpuWork::clearMapIfNeeded() {
     if (!mInitialized.load() || !mGpuWorkMap.isValid() || !mGpuWorkGlobalDataMap.isValid()) {
         ALOGW("Map clearing could not occur because we are not initialized properly");
         return;
     }

     base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
     if (!globalData.ok()) {
         ALOGW("Could not read BPF global data map entry");
         return;
     }

     // Note that userspace reads of BPF maps make a copy of the value, and thus
     // the return value is not being concurrently accessed by the BPF program
     // (no atomic reads needed below).

     uint64_t numEntries = globalData.value().num_map_entries;

     // If the map is <=75% full, we do nothing.
     if (numEntries <= (kMaxTrackedGpuIdUids / 4) * 3) {
         return;
     }

     clearMap();
 }

 void GpuWork::clearMap() {
     if (!mInitialized.load() || !mGpuWorkMap.isValid() || !mGpuWorkGlobalDataMap.isValid()) {
         ALOGW("Map clearing could not occur because we are not initialized properly");
         return;
     }

     base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
     if (!globalData.ok()) {
         ALOGW("Could not read BPF global data map entry");
         return;
     }

     // Iterating BPF maps to delete keys is tricky. If we just repeatedly call
     // |getFirstKey()| and delete that, we may loop forever (or for a long time)
     // because our BPF program might be repeatedly re-adding keys. Also, even if
     // we limit the number of elements we try to delete, we might only delete
     // new entries, leaving old entries in the map. If we delete a key A and
     // then call |getNextKey(A)|, the first key in the map is returned, so we
     // have the same issue.
     //
     // Thus, we instead get the next key and then delete the previous key. We
     // also limit the number of deletions we try, just in case.

     base::Result<GpuIdUid> key = mGpuWorkMap.getFirstKey();

     for (size_t i = 0; i < kMaxTrackedGpuIdUids; ++i) {
         if (!key.ok()) {
             break;
         }
         base::Result<GpuIdUid> previousKey = key;
         key = mGpuWorkMap.getNextKey(previousKey.value());
         mGpuWorkMap.deleteValue(previousKey.value());
     }

     // Reset our counter; |globalData| is a copy of the data, so we have to use
     // |writeValue|.
     globalData.value().num_map_entries = 0;
     mGpuWorkGlobalDataMap.writeValue(0, globalData.value(), BPF_ANY);

     // Update |mPreviousMapClearTimePoint| so we know when we started collecting
     // the stats.
     mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
 }

 void GpuWork::waitForPermissions() {
     const String16 permissionRegisterStatsPullAtom(kPermissionRegisterStatsPullAtom);
     int count = 0;
     while (!PermissionCache::checkPermission(permissionRegisterStatsPullAtom, getpid(), getuid())) {
         if (++count > kPermissionsWaitTimeoutSeconds) {
             ALOGW("Timed out waiting for android.permission.REGISTER_STATS_PULL_ATOM");
             return;
         }
         // Retry.
         sleep(1);
     }
 }

 } // namespace gpuwork
 } // namespace android
	/*
	* Copyright 2022 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#undef LOG_TAG
	#define LOG_TAG "GpuWork"
	#define ATRACE_TAG ATRACE_TAG_GRAPHICS

	#include "gpuwork/GpuWork.h"

	#include <android-base/stringprintf.h>
	#include <binder/PermissionCache.h>
	#include <bpf/WaitForProgsLoaded.h>
	#include <libbpf.h>
	#include <log/log.h>
	#include <random>
	#include <stats_event.h>
	#include <statslog.h>
	#include <unistd.h>
	#include <utils/Timers.h>
	#include <utils/Trace.h>

	#include <bit>
	#include <chrono>
	#include <cstdint>
	#include <limits>
	#include <map>
	#include <mutex>
	#include <unordered_map>
	#include <unordered_set>
	#include <vector>

	#include "gpuwork/gpuWork.h"

	#define ONE_MS_IN_NS (10000000)

	namespace android {
	namespace gpuwork {

	namespace {

	bool lessThanGpuIdUid(const android::gpuwork::GpuIdUid& l, const android::gpuwork::GpuIdUid& r) {
	return std::tie(l.gpu_id, l.uid) < std::tie(r.gpu_id, r.uid);
	}

	size_t hashGpuIdUid(const android::gpuwork::GpuIdUid& gpuIdUid) {
	return static_cast<size_t>((gpuIdUid.gpu_id << 5U) + gpuIdUid.uid);
	}

	bool equalGpuIdUid(const android::gpuwork::GpuIdUid& l, const android::gpuwork::GpuIdUid& r) {
	return std::tie(l.gpu_id, l.uid) == std::tie(r.gpu_id, r.uid);
	}

	// Gets a BPF map from \|mapPath\|.
	template <class Key, class Value>
	bool getBpfMap(const char* mapPath, bpf::BpfMap<Key, Value>* out) {
	errno = 0;
	auto map = bpf::BpfMap<Key, Value>(mapPath);
	if (!map.isValid()) {
	ALOGW("Failed to create bpf map from %s [%d(%s)]", mapPath, errno, strerror(errno));
	return false;
	}
	*out = std::move(map);
	return true;
	}

	template <typename SourceType>
	inline int32_t cast_int32(SourceType) = delete;

	template <typename SourceType>
	inline int32_t bitcast_int32(SourceType) = delete;

	template <>
	inline int32_t bitcast_int32<uint32_t>(uint32_t source) {
	int32_t result;
	memcpy(&result, &source, sizeof(result));
	return result;
	}

	} // namespace

	using base::StringAppendF;

	GpuWork::~GpuWork() {
	// If we created our clearer thread, then we must stop it and join it.
	if (mMapClearerThread.joinable()) {
	// Tell the thread to terminate.
	{
	std::scoped_lock<std::mutex> lock(mMutex);
	mIsTerminating = true;
	mIsTerminatingConditionVariable.notify_all();
	}

	// Now, we can join it.
	mMapClearerThread.join();
	}

	{
	std::scoped_lock<std::mutex> lock(mMutex);
	if (mStatsdRegistered) {
	AStatsManager_clearPullAtomCallback(android::util::GPU_WORK_PER_UID);
	}
	}

	bpf_detach_tracepoint("power", "gpu_work_period");
	}

	void GpuWork::initialize() {
	// Make sure BPF programs are loaded.
	bpf::waitForProgsLoaded();

	waitForPermissions();

	// Get the BPF maps before trying to attach the BPF program; if we can't get
	// the maps then there is no point in attaching the BPF program.
	{
	std::lock_guard<std::mutex> lock(mMutex);

	if (!getBpfMap("/sys/fs/bpf/map_gpuWork_gpu_work_map", &mGpuWorkMap)) {
	return;
	}

	if (!getBpfMap("/sys/fs/bpf/map_gpuWork_gpu_work_global_data", &mGpuWorkGlobalDataMap)) {
	return;
	}

	mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
	}

	// Attach the tracepoint.
	if (!attachTracepoint("/sys/fs/bpf/prog_gpuWork_tracepoint_power_gpu_work_period", "power",
	"gpu_work_period")) {
	return;
	}

	// Create the map clearer thread, and store it to \|mMapClearerThread\|.
	std::thread thread([this]() { periodicallyClearMap(); });

	mMapClearerThread.swap(thread);

	{
	std::lock_guard<std::mutex> lock(mMutex);
	AStatsManager_setPullAtomCallback(int32_t{android::util::GPU_WORK_PER_UID}, nullptr,
	GpuWork::pullAtomCallback, this);
	mStatsdRegistered = true;
	}

	ALOGI("Initialized!");

	mInitialized.store(true);
	}

	void GpuWork::dump(const Vector<String16>& /* args /, std::string result) {
	if (!mInitialized.load()) {
	result->append("GPU work information is not available.\n");
	return;
	}

	// Ordered map ensures output data is sorted.
	std::map<GpuIdUid, UidTrackingInfo, decltype(lessThanGpuIdUid)*> dumpMap(&lessThanGpuIdUid);

	{
	std::lock_guard<std::mutex> lock(mMutex);

	if (!mGpuWorkMap.isValid()) {
	result->append("GPU work map is not available.\n");
	return;
	}

	// Iteration of BPF hash maps can be unreliable (no data races, but elements
	// may be repeated), as the map is typically being modified by other
	// threads. The buckets are all preallocated. Our eBPF program only updates
	// entries (in-place) or adds entries. \|GpuWork\| only iterates or clears the
	// map while holding \|mMutex\|. Given this, we should be able to iterate over
	// all elements reliably. Nevertheless, we copy into a map to avoid
	// duplicates.

	// Note that userspace reads of BPF maps make a copy of the value, and
	// thus the returned value is not being concurrently accessed by the BPF
	// program (no atomic reads needed below).

	mGpuWorkMap.iterateWithValue(
	[&dumpMap](const GpuIdUid& key, const UidTrackingInfo& value,
	const android::bpf::BpfMap<GpuIdUid, UidTrackingInfo>&)
	-> base::Result<void> {
	dumpMap[key] = value;
	return {};
	});
	}

	// Dump work information.
	// E.g.
	// GPU work information.
	// gpu_id uid total_active_duration_ns total_inactive_duration_ns
	// 0 1000 0 0
	// 0 1003 1234 123
	// [errors:3]0 1006 4567 456

	// Header.
	result->append("GPU work information.\ngpu_id uid total_active_duration_ns "
	"total_inactive_duration_ns\n");

	for (const auto& idToUidInfo : dumpMap) {
	if (idToUidInfo.second.error_count) {
	StringAppendF(result, "[errors:%" PRIu32 "]", idToUidInfo.second.error_count);
	}
	StringAppendF(result, "%" PRIu32 " %" PRIu32 " %" PRIu64 " %" PRIu64 "\n",
	idToUidInfo.first.gpu_id, idToUidInfo.first.uid,
	idToUidInfo.second.total_active_duration_ns,
	idToUidInfo.second.total_inactive_duration_ns);
	}
	}

	bool GpuWork::attachTracepoint(const char* programPath, const char* tracepointGroup,
	const char* tracepointName) {
	errno = 0;
	base::unique_fd fd(bpf::retrieveProgram(programPath));
	if (fd < 0) {
	ALOGW("Failed to retrieve pinned program from %s [%d(%s)]", programPath, errno,
	strerror(errno));
	return false;
	}

	// Attach the program to the tracepoint. The tracepoint is automatically enabled.
	errno = 0;
	int count = 0;
	while (bpf_attach_tracepoint(fd.get(), tracepointGroup, tracepointName) < 0) {
	if (++count > kGpuWaitTimeoutSeconds) {
	ALOGW("Failed to attach bpf program to %s/%s tracepoint [%d(%s)]", tracepointGroup,
	tracepointName, errno, strerror(errno));
	return false;
	}
	// Retry until GPU driver loaded or timeout.
	sleep(1);
	errno = 0;
	}

	return true;
	}

	AStatsManager_PullAtomCallbackReturn GpuWork::pullAtomCallback(int32_t atomTag,
	AStatsEventList* data,
	void* cookie) {
	ATRACE_CALL();

	GpuWork* gpuWork = reinterpret_cast<GpuWork*>(cookie);
	if (atomTag == android::util::GPU_WORK_PER_UID) {
	return gpuWork->pullWorkAtoms(data);
	}

	return AStatsManager_PULL_SKIP;
	}

	AStatsManager_PullAtomCallbackReturn GpuWork::pullWorkAtoms(AStatsEventList* data) {
	ATRACE_CALL();

	if (!data \|\| !mInitialized.load()) {
	return AStatsManager_PULL_SKIP;
	}

	std::lock_guard<std::mutex> lock(mMutex);

	if (!mGpuWorkMap.isValid()) {
	return AStatsManager_PULL_SKIP;
	}

	std::unordered_map<GpuIdUid, UidTrackingInfo, decltype(hashGpuIdUid), decltype(equalGpuIdUid)>
	workMap(32, &hashGpuIdUid, &equalGpuIdUid);

	// Iteration of BPF hash maps can be unreliable (no data races, but elements
	// may be repeated), as the map is typically being modified by other
	// threads. The buckets are all preallocated. Our eBPF program only updates
	// entries (in-place) or adds entries. \|GpuWork\| only iterates or clears the
	// map while holding \|mMutex\|. Given this, we should be able to iterate over
	// all elements reliably. Nevertheless, we copy into a map to avoid
	// duplicates.

	// Note that userspace reads of BPF maps make a copy of the value, and thus
	// the returned value is not being concurrently accessed by the BPF program
	// (no atomic reads needed below).

	mGpuWorkMap.iterateWithValue([&workMap](const GpuIdUid& key, const UidTrackingInfo& value,
	const android::bpf::BpfMap<GpuIdUid, UidTrackingInfo>&)
	-> base::Result<void> {
	workMap[key] = value;
	return {};
	});

	// Get a list of just the UIDs; the order does not matter.
	std::vector<Uid> uids;
	// Get a list of the GPU IDs, in order.
	std::set<uint32_t> gpuIds;
	{
	// To avoid adding duplicate UIDs.
	std::unordered_set<Uid> addedUids;

	for (const auto& workInfo : workMap) {
	if (addedUids.insert(workInfo.first.uid).second) {
	// Insertion was successful.
	uids.push_back(workInfo.first.uid);
	}
	gpuIds.insert(workInfo.first.gpu_id);
	}
	}

	ALOGI("pullWorkAtoms: uids.size() == %zu", uids.size());
	ALOGI("pullWorkAtoms: gpuIds.size() == %zu", gpuIds.size());

	if (gpuIds.size() > kNumGpusHardLimit) {
	// If we observe a very high number of GPUs then something has probably
	// gone wrong, so don't log any atoms.
	return AStatsManager_PULL_SKIP;
	}

	size_t numSampledUids = kNumSampledUids;

	if (gpuIds.size() > kNumGpusSoftLimit) {
	// If we observe a high number of GPUs then we just sample 1 UID.
	numSampledUids = 1;
	}

	// Remove all UIDs that do not have at least \|kMinGpuTimeNanoseconds\| on at
	// least one GPU.
	{
	auto uidIt = uids.begin();
	while (uidIt != uids.end()) {
	bool hasEnoughGpuTime = false;
	for (uint32_t gpuId : gpuIds) {
	auto infoIt = workMap.find(GpuIdUid{gpuId, *uidIt});
	if (infoIt == workMap.end()) {
	continue;
	}
	if (infoIt->second.total_active_duration_ns +
	infoIt->second.total_inactive_duration_ns >=
	kMinGpuTimeNanoseconds) {
	hasEnoughGpuTime = true;
	break;
	}
	}
	if (hasEnoughGpuTime) {
	++uidIt;
	} else {
	uidIt = uids.erase(uidIt);
	}
	}
	}

	ALOGI("pullWorkAtoms: after removing uids with very low GPU time: uids.size() == %zu",
	uids.size());

	std::random_device device;
	std::default_random_engine random_engine(device());

	// If we have more than \|numSampledUids\| UIDs, choose \|numSampledUids\|
	// random UIDs. We swap them to the front of the list. Given the list
	// indices 0..i..n-1, we have the following inclusive-inclusive ranges:
	// - [0, i-1] == the randomly chosen elements.
	// - [i, n-1] == the remaining unchosen elements.
	if (uids.size() > numSampledUids) {
	for (size_t i = 0; i < numSampledUids; ++i) {
	std::uniform_int_distribution<size_t> uniform_dist(i, uids.size() - 1);
	size_t random_index = uniform_dist(random_engine);
	std::swap(uids[i], uids[random_index]);
	}
	// Only keep the front \|numSampledUids\| elements.
	uids.resize(numSampledUids);
	}

	ALOGI("pullWorkAtoms: after random selection: uids.size() == %zu", uids.size());

	auto now = std::chrono::steady_clock::now();
	long long duration =
	std::chrono::duration_cast<std::chrono::seconds>(now - mPreviousMapClearTimePoint)
	.count();
	if (duration > std::numeric_limits<int32_t>::max() \|\| duration < 0) {
	// This is essentially impossible. If it does somehow happen, give up,
	// but still clear the map.
	clearMap();
	return AStatsManager_PULL_SKIP;
	}

	// Log an atom for each (gpu id, uid) pair for which we have data.
	for (uint32_t gpuId : gpuIds) {
	for (Uid uid : uids) {
	auto it = workMap.find(GpuIdUid{gpuId, uid});
	if (it == workMap.end()) {
	continue;
	}
	const UidTrackingInfo& info = it->second;

	uint64_t total_active_duration_ms = info.total_active_duration_ns / ONE_MS_IN_NS;
	uint64_t total_inactive_duration_ms = info.total_inactive_duration_ns / ONE_MS_IN_NS;

	// Skip this atom if any numbers are out of range. \|duration\| is
	// already checked above.
	if (total_active_duration_ms > std::numeric_limits<int32_t>::max() \|\|
	total_inactive_duration_ms > std::numeric_limits<int32_t>::max()) {
	continue;
	}

	ALOGI("pullWorkAtoms: adding stats for GPU ID %" PRIu32 "; UID %" PRIu32, gpuId, uid);
	android::util::addAStatsEvent(data, int32_t{android::util::GPU_WORK_PER_UID},
	// uid
	bitcast_int32(uid),
	// gpu_id
	bitcast_int32(gpuId),
	// time_duration_seconds
	static_cast<int32_t>(duration),
	// total_active_duration_millis
	static_cast<int32_t>(total_active_duration_ms),
	// total_inactive_duration_millis
	static_cast<int32_t>(total_inactive_duration_ms));
	}
	}
	clearMap();
	return AStatsManager_PULL_SUCCESS;
	}

	void GpuWork::periodicallyClearMap() {
	std::unique_lock<std::mutex> lock(mMutex);

	auto previousTime = std::chrono::steady_clock::now();

	while (true) {
	if (mIsTerminating) {
	break;
	}
	auto nextTime = std::chrono::steady_clock::now();
	auto differenceSeconds =
	std::chrono::duration_cast<std::chrono::seconds>(nextTime - previousTime);
	if (differenceSeconds.count() > kMapClearerWaitDurationSeconds) {
	// It has been >1 hour, so clear the map, if needed.
	clearMapIfNeeded();
	// We only update \|previousTime\| if we actually checked the map.
	previousTime = nextTime;
	}
	// Sleep for ~1 hour. It does not matter if we don't check the map for 2
	// hours.
	mIsTerminatingConditionVariable.wait_for(lock,
	std::chrono::seconds{
	kMapClearerWaitDurationSeconds});
	}
	}

	void GpuWork::clearMapIfNeeded() {
	if (!mInitialized.load() \|\| !mGpuWorkMap.isValid() \|\| !mGpuWorkGlobalDataMap.isValid()) {
	ALOGW("Map clearing could not occur because we are not initialized properly");
	return;
	}

	base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
	if (!globalData.ok()) {
	ALOGW("Could not read BPF global data map entry");
	return;
	}

	// Note that userspace reads of BPF maps make a copy of the value, and thus
	// the return value is not being concurrently accessed by the BPF program
	// (no atomic reads needed below).

	uint64_t numEntries = globalData.value().num_map_entries;

	// If the map is <=75% full, we do nothing.
	if (numEntries <= (kMaxTrackedGpuIdUids / 4) * 3) {
	return;
	}

	clearMap();
	}

	void GpuWork::clearMap() {
	if (!mInitialized.load() \|\| !mGpuWorkMap.isValid() \|\| !mGpuWorkGlobalDataMap.isValid()) {
	ALOGW("Map clearing could not occur because we are not initialized properly");
	return;
	}

	base::Result<GlobalData> globalData = mGpuWorkGlobalDataMap.readValue(0);
	if (!globalData.ok()) {
	ALOGW("Could not read BPF global data map entry");
	return;
	}

	// Iterating BPF maps to delete keys is tricky. If we just repeatedly call
	// \|getFirstKey()\| and delete that, we may loop forever (or for a long time)
	// because our BPF program might be repeatedly re-adding keys. Also, even if
	// we limit the number of elements we try to delete, we might only delete
	// new entries, leaving old entries in the map. If we delete a key A and
	// then call \|getNextKey(A)\|, the first key in the map is returned, so we
	// have the same issue.
	//
	// Thus, we instead get the next key and then delete the previous key. We
	// also limit the number of deletions we try, just in case.

	base::Result<GpuIdUid> key = mGpuWorkMap.getFirstKey();

	for (size_t i = 0; i < kMaxTrackedGpuIdUids; ++i) {
	if (!key.ok()) {
	break;
	}
	base::Result<GpuIdUid> previousKey = key;
	key = mGpuWorkMap.getNextKey(previousKey.value());
	mGpuWorkMap.deleteValue(previousKey.value());
	}

	// Reset our counter; \|globalData\| is a copy of the data, so we have to use
	// \|writeValue\|.
	globalData.value().num_map_entries = 0;
	mGpuWorkGlobalDataMap.writeValue(0, globalData.value(), BPF_ANY);

	// Update \|mPreviousMapClearTimePoint\| so we know when we started collecting
	// the stats.
	mPreviousMapClearTimePoint = std::chrono::steady_clock::now();
	}

	void GpuWork::waitForPermissions() {
	const String16 permissionRegisterStatsPullAtom(kPermissionRegisterStatsPullAtom);
	int count = 0;
	while (!PermissionCache::checkPermission(permissionRegisterStatsPullAtom, getpid(), getuid())) {
	if (++count > kPermissionsWaitTimeoutSeconds) {
	ALOGW("Timed out waiting for android.permission.REGISTER_STATS_PULL_ATOM");
	return;
	}
	// Retry.
	sleep(1);
	}
	}

	} // namespace gpuwork
	} // namespace android