| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <android/log.h> |
| #include <math.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| |
| #include "Bench.h" |
| |
| |
| Bench::Bench() |
| { |
| mTimeBucket = NULL; |
| mTimeBuckets = 0; |
| mTimeBucketDivisor = 1; |
| |
| mMemLatencyLastSize = 0; |
| mMemDst = NULL; |
| mMemSrc = NULL; |
| mMemLoopCount = 0; |
| } |
| |
| |
| Bench::~Bench() |
| { |
| } |
| |
| uint64_t Bench::getTimeNanos() const |
| { |
| struct timespec t; |
| clock_gettime(CLOCK_MONOTONIC, &t); |
| return t.tv_nsec + ((uint64_t)t.tv_sec * 1000 * 1000 * 1000); |
| } |
| |
| uint64_t Bench::getTimeMillis() const |
| { |
| return getTimeNanos() / 1000000; |
| } |
| |
| |
| void Bench::testWork(void *usr, uint32_t idx) |
| { |
| Bench *b = (Bench *)usr; |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "test %i %p", idx, b); |
| |
| float f1 = 0.f; |
| float f2 = 0.f; |
| float f3 = 0.f; |
| float f4 = 0.f; |
| |
| float *ipk = b->mIpKernel[idx]; |
| volatile float *src = b->mSrcBuf[idx]; |
| volatile float *out = b->mOutBuf[idx]; |
| |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "test %p %p %p", ipk, src, out); |
| |
| do { |
| |
| for (int i = 0; i < 1024; i++) { |
| f1 += src[i * 4] * ipk[i]; |
| f2 += src[i * 4 + 1] * ipk[i]; |
| f3 += src[i * 4 + 2] * ipk[i]; |
| f4 += sqrtf(f1 + f2 + f3); |
| } |
| out[0] = f1; |
| out[1] = f1; |
| out[2] = f1; |
| out[3] = f1; |
| |
| } while (b->incTimeBucket()); |
| } |
| |
| bool Bench::initIP() { |
| int workers = mWorkers.getWorkerCount(); |
| |
| mIpKernel = new float *[workers]; |
| mSrcBuf = new float *[workers]; |
| mOutBuf = new float *[workers]; |
| |
| for (int i = 0; i < workers; i++) { |
| mIpKernel[i] = new float[1024]; |
| mSrcBuf[i] = new float[4096]; |
| mOutBuf[i] = new float[4]; |
| } |
| |
| |
| return true; |
| } |
| |
| bool Bench::runPowerManagementTest(uint64_t options) { |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt x %i", options); |
| |
| mTimeBucketDivisor = 1000 * 1000; // use ms |
| allocateBuckets(2 * 1000); |
| |
| usleep(2 * 1000 * 1000); |
| |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2 b %i", mTimeBuckets); |
| |
| mTimeStartNanos = getTimeNanos(); |
| mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor; |
| memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets); |
| |
| bool useMT = false; |
| |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "rpmt 2.1 b %i", mTimeBuckets); |
| mTimeEndGroupNanos = mTimeStartNanos; |
| do { |
| // Advance 8ms |
| mTimeEndGroupNanos += 8 * 1000 * 1000; |
| |
| int threads = useMT ? 1 : 0; |
| useMT = !useMT; |
| if ((options & 0x1f) != 0) { |
| threads = options & 0x1f; |
| } |
| |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "threads %i", threads); |
| |
| mWorkers.launchWork(testWork, this, threads); |
| } while (mTimeEndGroupNanos <= mTimeEndNanos); |
| } |
| |
| bool Bench::allocateBuckets(size_t bucketCount) { |
| if (bucketCount == mTimeBuckets) { |
| return true; |
| } |
| |
| if (mTimeBucket != NULL) { |
| delete[] mTimeBucket; |
| mTimeBucket = NULL; |
| } |
| |
| mTimeBuckets = bucketCount; |
| if (mTimeBuckets > 0) { |
| mTimeBucket = new uint32_t[mTimeBuckets]; |
| } |
| |
| return true; |
| } |
| |
| bool Bench::init() { |
| mWorkers.init(); |
| |
| initIP(); |
| |
| //ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1); |
| |
| return true; |
| } |
| |
| bool Bench::incTimeBucket() const { |
| uint64_t time = getTimeNanos(); |
| uint64_t bucket = (time - mTimeStartNanos) / mTimeBucketDivisor; |
| |
| if (bucket >= mTimeBuckets) { |
| return false; |
| } |
| |
| __sync_fetch_and_add(&mTimeBucket[bucket], 1); |
| |
| return time < mTimeEndGroupNanos; |
| } |
| |
| void Bench::getData(float *data, size_t count) const { |
| if (count > mTimeBuckets) { |
| count = mTimeBuckets; |
| } |
| for (size_t ct = 0; ct < count; ct++) { |
| data[ct] = (float)mTimeBucket[ct]; |
| } |
| } |
| |
| |
| bool Bench::runCPUHeatSoak(uint64_t options) |
| { |
| mTimeBucketDivisor = 1000 * 1000; // use ms |
| allocateBuckets(1000); |
| |
| mTimeStartNanos = getTimeNanos(); |
| mTimeEndNanos = mTimeStartNanos + mTimeBuckets * mTimeBucketDivisor; |
| memset(mTimeBucket, 0, sizeof(uint32_t) * mTimeBuckets); |
| |
| mTimeEndGroupNanos = mTimeEndNanos; |
| mWorkers.launchWork(testWork, this, 0); |
| } |
| |
| float Bench::runMemoryBandwidthTest(uint64_t size) |
| { |
| uint64_t t1 = getTimeMillis(); |
| for (size_t ct = mMemLoopCount; ct > 0; ct--) { |
| memcpy(mMemDst, mMemSrc, size); |
| } |
| double dt = getTimeMillis() - t1; |
| dt /= 1000; |
| |
| double bw = ((double)size) * mMemLoopCount / dt; |
| bw /= 1024 * 1024 * 1024; |
| |
| float targetTime = 0.2f; |
| if (dt > targetTime) { |
| mMemLoopCount = (size_t)((double)mMemLoopCount / (dt / targetTime)); |
| } |
| |
| return (float)bw; |
| } |
| |
| float Bench::runMemoryLatencyTest(uint64_t size) |
| { |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "latency %i", (int)size); |
| void ** sp = (void **)mMemSrc; |
| size_t maxIndex = size / sizeof(void *); |
| size_t loops = ((maxIndex / 2) & (~3)); |
| //loops = 10; |
| |
| if (size != mMemLatencyLastSize) { |
| __android_log_print(ANDROID_LOG_INFO, "bench", "latency build %i %i", (int)maxIndex, loops); |
| mMemLatencyLastSize = size; |
| memset((void *)mMemSrc, 0, mMemLatencyLastSize); |
| |
| size_t lastIdx = 0; |
| for (size_t ct = 0; ct < loops; ct++) { |
| size_t ni = rand() * rand(); |
| ni = ni % maxIndex; |
| while ((sp[ni] != NULL) || (ni == lastIdx)) { |
| ni++; |
| if (ni >= maxIndex) { |
| ni = 1; |
| } |
| // __android_log_print(ANDROID_LOG_INFO, "bench", "gen ni loop %i %i", lastIdx, ni); |
| } |
| // __android_log_print(ANDROID_LOG_INFO, "bench", "gen ct = %i %i %i %p %p", (int)ct, lastIdx, ni, &sp[lastIdx], &sp[ni]); |
| sp[lastIdx] = &sp[ni]; |
| lastIdx = ni; |
| } |
| sp[lastIdx] = 0; |
| } |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "latency testing"); |
| |
| uint64_t t1 = getTimeNanos(); |
| for (size_t ct = mMemLoopCount; ct > 0; ct--) { |
| size_t lc = 1; |
| volatile void *p = sp[0]; |
| while (p != NULL) { |
| // Unroll once to minimize branching overhead. |
| void **pn = (void **)p; |
| p = pn[0]; |
| pn = (void **)p; |
| p = pn[0]; |
| } |
| } |
| //__android_log_print(ANDROID_LOG_INFO, "bench", "v %i %i", loops * mMemLoopCount, v); |
| |
| double dt = getTimeNanos() - t1; |
| double dts = dt / 1000000000; |
| double lat = dt / (loops * mMemLoopCount); |
| __android_log_print(ANDROID_LOG_INFO, "bench", "latency ret %f", lat); |
| |
| float targetTime = 0.2f; |
| if (dts > targetTime) { |
| mMemLoopCount = (size_t)((double)mMemLoopCount / (dts / targetTime)); |
| if (mMemLoopCount < 1) { |
| mMemLoopCount = 1; |
| } |
| } |
| |
| return (float)lat; |
| } |
| |
| bool Bench::startMemTests() |
| { |
| mMemSrc = (uint8_t *)malloc(1024*1024*64); |
| mMemDst = (uint8_t *)malloc(1024*1024*64); |
| |
| memset(mMemSrc, 0, 1024*1024*16); |
| memset(mMemDst, 0, 1024*1024*16); |
| |
| mMemLoopCount = 1; |
| uint64_t start = getTimeMillis(); |
| while((getTimeMillis() - start) < 500) { |
| memcpy(mMemDst, mMemSrc, 1024); |
| mMemLoopCount++; |
| } |
| mMemLatencyLastSize = 0; |
| } |
| |
| void Bench::endMemTests() |
| { |
| free(mMemSrc); |
| free(mMemDst); |
| mMemSrc = NULL; |
| mMemDst = NULL; |
| mMemLatencyLastSize = 0; |
| |
| |
| } |