add simple cpu benchmarking

- performance benchmarking in CTS and does not affect device

- maxtrix multiplication for floating point operation
- qsort for generic processing

Bug: 7515273
Change-Id: I963b036869de0d93f19a078a7ea322d1a2632a6c
diff --git a/suite/pts/PtsBuild.mk b/suite/pts/PtsBuild.mk
index 18306b9..23e6d58 100644
--- a/suite/pts/PtsBuild.mk
+++ b/suite/pts/PtsBuild.mk
@@ -22,7 +22,8 @@
 PTS_TEST_PACKAGES := \
     PtsDeviceFilePerf \
     PtsDeviceUi \
-    PtsDeviceDram
+    PtsDeviceDram \
+    PtsDeviceSimpleCpu
 
 
 PTS_SUPPORT_PACKAGES := \
diff --git a/suite/pts/deviceTests/simplecpu/Android.mk b/suite/pts/deviceTests/simplecpu/Android.mk
new file mode 100644
index 0000000..988ddc3
--- /dev/null
+++ b/suite/pts/deviceTests/simplecpu/Android.mk
@@ -0,0 +1,35 @@
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOCAL_PATH:= $(call my-dir)
+include $(CLEAR_VARS)
+
+# don't include this package in any target
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_JAVA_LIBRARIES := android.test.runner
+
+LOCAL_STATIC_JAVA_LIBRARIES := ptsutil ctsutil ctstestrunner
+
+LOCAL_JNI_SHARED_LIBRARIES := libptscpu_jni
+
+LOCAL_SRC_FILES := $(call all-java-files-under, src)
+
+LOCAL_PACKAGE_NAME := PtsDeviceSimpleCpu
+
+LOCAL_SDK_VERSION := 16
+
+include $(BUILD_PTS_PACKAGE)
+
+include $(call all-makefiles-under,$(LOCAL_PATH))
diff --git a/suite/pts/deviceTests/simplecpu/AndroidManifest.xml b/suite/pts/deviceTests/simplecpu/AndroidManifest.xml
new file mode 100644
index 0000000..930bcbd
--- /dev/null
+++ b/suite/pts/deviceTests/simplecpu/AndroidManifest.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2012 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+        package="com.android.pts.simplecpu">
+
+    <uses-permission android:name="android.permission.DISABLE_KEYGUARD" />
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
+
+    <application>
+        <uses-library android:name="android.test.runner" />
+    </application>
+    <instrumentation android:name="android.test.InstrumentationCtsTestRunner"
+            android:targetPackage="com.android.pts.simplecpu"
+            android:label="Vesy simple CPU benchmarking" />
+</manifest>
diff --git a/suite/pts/deviceTests/simplecpu/jni/Android.mk b/suite/pts/deviceTests/simplecpu/jni/Android.mk
new file mode 100644
index 0000000..9c34e0e
--- /dev/null
+++ b/suite/pts/deviceTests/simplecpu/jni/Android.mk
@@ -0,0 +1,31 @@
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE    := libptscpu_jni
+
+LOCAL_MODULE_TAGS := optional
+
+LOCAL_SRC_FILES := CpuNativeJni.cpp
+
+LOCAL_C_INCLUDES := $(JNI_H_INCLUDE)
+
+LOCAL_SHARED_LIBRARIES := libnativehelper
+
+LOCAL_SDK_VERSION := 14
+
+include $(BUILD_SHARED_LIBRARY)
diff --git a/suite/pts/deviceTests/simplecpu/jni/CpuNativeJni.cpp b/suite/pts/deviceTests/simplecpu/jni/CpuNativeJni.cpp
new file mode 100644
index 0000000..ecf56e6
--- /dev/null
+++ b/suite/pts/deviceTests/simplecpu/jni/CpuNativeJni.cpp
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+
+/* Code from now to qsort_local all copied from bionic source.
+ * The code is duplicated here to remove dependency on optimized bionic
+ */
+static __inline char    *med3(char *, char *, char *, int (*)(const void *, const void *));
+static __inline void     swapfunc(char *, char *, int, int);
+
+#define min(a, b)   (a) < (b) ? a : b
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+#define swapcode(TYPE, parmi, parmj, n) {       \
+    long i = (n) / sizeof (TYPE);           \
+    TYPE *pi = (TYPE *) (parmi);            \
+    TYPE *pj = (TYPE *) (parmj);            \
+    do {                        \
+        TYPE    t = *pi;            \
+        *pi++ = *pj;                \
+        *pj++ = t;              \
+        } while (--i > 0);              \
+}
+
+#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+    es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
+
+static __inline void
+swapfunc(char *a, char *b, int n, int swaptype)
+{
+    if (swaptype <= 1)
+        swapcode(long, a, b, n)
+    else
+        swapcode(char, a, b, n)
+}
+
+#define swap(a, b)                  \
+    if (swaptype == 0) {                \
+        long t = *(long *)(a);          \
+        *(long *)(a) = *(long *)(b);        \
+        *(long *)(b) = t;           \
+    } else                      \
+        swapfunc(a, b, es, swaptype)
+
+#define vecswap(a, b, n)    if ((n) > 0) swapfunc(a, b, n, swaptype)
+
+static __inline char *
+med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
+{
+    return cmp(a, b) < 0 ?
+           (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
+              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
+}
+
+void
+qsort_local(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
+{
+    char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+    int d, r, swaptype, swap_cnt;
+    char *a = (char*)aa;
+
+loop:   SWAPINIT(a, es);
+    swap_cnt = 0;
+    if (n < 7) {
+        for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
+            for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
+                 pl -= es)
+                swap(pl, pl - es);
+        return;
+    }
+    pm = (char *)a + (n / 2) * es;
+    if (n > 7) {
+        pl = (char *)a;
+        pn = (char *)a + (n - 1) * es;
+        if (n > 40) {
+            d = (n / 8) * es;
+            pl = med3(pl, pl + d, pl + 2 * d, cmp);
+            pm = med3(pm - d, pm, pm + d, cmp);
+            pn = med3(pn - 2 * d, pn - d, pn, cmp);
+        }
+        pm = med3(pl, pm, pn, cmp);
+    }
+    swap(a, pm);
+    pa = pb = (char *)a + es;
+
+    pc = pd = (char *)a + (n - 1) * es;
+    for (;;) {
+        while (pb <= pc && (r = cmp(pb, a)) <= 0) {
+            if (r == 0) {
+                swap_cnt = 1;
+                swap(pa, pb);
+                pa += es;
+            }
+            pb += es;
+        }
+        while (pb <= pc && (r = cmp(pc, a)) >= 0) {
+            if (r == 0) {
+                swap_cnt = 1;
+                swap(pc, pd);
+                pd -= es;
+            }
+            pc -= es;
+        }
+        if (pb > pc)
+            break;
+        swap(pb, pc);
+        swap_cnt = 1;
+        pb += es;
+        pc -= es;
+    }
+    if (swap_cnt == 0) {  /* Switch to insertion sort */
+        for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
+            for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
+                 pl -= es)
+                swap(pl, pl - es);
+        return;
+    }
+
+    pn = (char *)a + n * es;
+    r = min(pa - (char *)a, pb - pa);
+    vecswap(a, pb - r, r);
+    r = min(pd - pc, pn - pd - (int)es);
+    vecswap(pb, pn - r, r);
+    if ((r = pb - pa) > (int)es)
+        qsort_local(a, r / es, es, cmp);
+    if ((r = pd - pc) > (int)es) {
+        /* Iterate rather than recurse to save stack space */
+        a = pn - r;
+        n = r / es;
+        goto loop;
+    }
+    /* qsort(pn - r, r / es, es, cmp); */
+}
+
+/* code duplication ends here */
+
+/**
+ * Util for getting time stamp
+ */
+long currentTimeMillis()
+{
+    struct timeval tv;
+    gettimeofday(&tv, (struct timezone *) NULL);
+    return (long)tv.tv_sec * 1000 + tv.tv_usec / 1000;
+}
+
+/**
+ * Initialize given array randomly for the given seed
+ */
+template <typename T> void randomInitArray(T* array, int len, unsigned int seed)
+{
+    srand(seed);
+    for (int i = 0; i < len; i++) {
+        array[i] = (T) rand();
+    }
+}
+
+/**
+ * comparison function for int, for qsort
+ */
+int cmpint(const void* p1, const void* p2)
+{
+    return *(int*)p1 - *(int*)p2;
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_com_android_pts_simplecpu_CpuNative_runSort(JNIEnv* env,
+        jclass clazz, jint numberElements, jint repetition)
+{
+    int* data = new int[numberElements];
+    if (data == NULL) {
+        env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
+        return -1;
+    }
+    long totalTime = 0;
+    for (int i = 0; i < repetition; i++) {
+        randomInitArray<int>(data, numberElements, 0);
+        long start = currentTimeMillis();
+        qsort_local(data, numberElements, sizeof(int), cmpint);
+        long end = currentTimeMillis();
+        totalTime += (end - start);
+    }
+    delete[] data;
+    return totalTime;
+}
+
+
+/**
+ * Do matrix multiplication, C = A x B with all matrices having dimension of n x n
+ * The implementation is not in the most efficient, but it is good enough for benchmarking purpose.
+ * @param n should be multiple of 8
+ */
+void doMatrixMultiplication(float* A, float* B, float* C, int n)
+{
+    // batch size
+    const int M = 8;
+    for (int i = 0; i < n; i++) {
+        for (int j = 0; j < n; j += M) {
+            float sum[M];
+            for (int k = 0; k < M; k++) {
+                sum[k] = 0;
+            }
+            // re-use the whole cache line for accessing B.
+            // otherwise, the whole line will be read and only one value will be used.
+
+            for (int k = 0; k < n; k++) {
+                float a = A[i * n + k];
+                sum[0] += a * B[k * n + j];
+                sum[1] += a * B[k * n + j + 1];
+                sum[2] += a * B[k * n + j + 2];
+                sum[3] += a * B[k * n + j + 3];
+                sum[4] += a * B[k * n + j + 4];
+                sum[5] += a * B[k * n + j + 5];
+                sum[6] += a * B[k * n + j + 6];
+                sum[7] += a * B[k * n + j + 7];
+            }
+            for (int k = 0; k < M; k++) {
+                C[i * n + j + k] = sum[k];
+            }
+        }
+    }
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_com_android_pts_simplecpu_CpuNative_runMatrixMultiplication(
+        JNIEnv* env, jclass clazz, jint n, jint repetition)
+{
+    // C = A x B
+    float* A = new float[n * n];
+    float* B = new float[n * n];
+    float* C = new float[n * n];
+    if ((A == NULL) || (B == NULL) || (C == NULL)) {
+        delete[] A;
+        delete[] B;
+        delete[] C;
+        env->ThrowNew(env->FindClass("java/lang/OutOfMemoryError"), "No memory");
+        return -1;
+    }
+    long totalTime = 0;
+    for (int i = 0; i < repetition; i++) {
+        randomInitArray<float>(A, n * n, 0);
+        randomInitArray<float>(B, n * n, 1);
+        long start = currentTimeMillis();
+        doMatrixMultiplication(A, B, C, n);
+        long end = currentTimeMillis();
+        totalTime += (end - start);
+    }
+    delete[] A;
+    delete[] B;
+    delete[] C;
+    return totalTime;
+}
+
diff --git a/suite/pts/deviceTests/simplecpu/src/com/android/pts/simplecpu/CpuNative.java b/suite/pts/deviceTests/simplecpu/src/com/android/pts/simplecpu/CpuNative.java
new file mode 100644
index 0000000..666ff88
--- /dev/null
+++ b/suite/pts/deviceTests/simplecpu/src/com/android/pts/simplecpu/CpuNative.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.pts.simplecpu;
+
+public class CpuNative {
+    static {
+        System.loadLibrary("ptscpu_jni");
+    }
+    /**
+     * run qsort for given number of repetition
+     * with each having the size of bufferSize.
+     * @param numberElements
+     * @param repeatition
+     * @return time spent in sorting in ms.
+     */
+    public static native long runSort(int numberElements, int repetition);
+
+    /**
+     * run matrix multiplication of (n x n) x (n x n)
+     *
+     * @param n dimension, should be multiple of 8
+     * @param repetition
+     * @return
+     */
+    public static native long runMatrixMultiplication(int n, int repetition);
+}
diff --git a/suite/pts/deviceTests/simplecpu/src/com/android/pts/simplecpu/SimpleCpuTest.java b/suite/pts/deviceTests/simplecpu/src/com/android/pts/simplecpu/SimpleCpuTest.java
new file mode 100644
index 0000000..df8cfd9
--- /dev/null
+++ b/suite/pts/deviceTests/simplecpu/src/com/android/pts/simplecpu/SimpleCpuTest.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.pts.simplecpu;
+
+import android.cts.util.TimeoutReq;
+
+import com.android.pts.util.PtsAndroidTestCase;
+import com.android.pts.util.ReportLog;
+import com.android.pts.util.Stat;
+
+/**
+ * Very simple CPU benchmarking to check the basic capability of CPU.
+ * Cases include
+ *   qsort
+ *   matrix multiplication (for floating point performance)
+ */
+public class SimpleCpuTest extends PtsAndroidTestCase {
+    private static final String TAG = "BandwidthTest";
+    private static final int KB = 1024;
+    private static final int MB = 1024 * 1024;
+    private static final int NUMBER_REPEAT = 20;
+
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+        warmUpCpu();
+    }
+
+    public void testSort004KB() {
+        doTestSort(NUMBER_REPEAT, 4 * KB);
+    }
+
+    public void testSort128KB() {
+        doTestSort(NUMBER_REPEAT, 128 * KB);
+    }
+
+    public void testSort001MB() {
+        doTestSort(NUMBER_REPEAT, 1 * MB);
+    }
+
+    // will fit into L1
+    public void testMatrixMultiplication032() {
+        doMatrixMultiplication(NUMBER_REPEAT, 32);
+    }
+
+    // mostly fit into L2
+    public void testMatrixMultiplication128() {
+        doMatrixMultiplication(NUMBER_REPEAT, 128);
+    }
+
+    // may fit into L2
+    public void testMatrixMultiplication200() {
+        doMatrixMultiplication(NUMBER_REPEAT, 200);
+    }
+
+    public void testMatrixMultiplication400() {
+        doMatrixMultiplication(NUMBER_REPEAT, 400);
+    }
+
+    // will exceed L2
+    @TimeoutReq(minutes = 20)
+    public void testMatrixMultiplication600() {
+        doMatrixMultiplication(NUMBER_REPEAT, 600);
+    }
+
+    /**
+     * run some code to force full CPU freq.
+     */
+    private void warmUpCpu() {
+        CpuNative.runSort(1 * MB, 10);
+    }
+
+    /**
+     * qsort test
+     * @param numberRepeat
+     * @param arrayLength
+     */
+    private void doTestSort(int numberRepeat, int arrayLength) {
+        final int numberRepeatInEachCall = 10;
+        double[] result = new double[numberRepeat];
+        for (int i = 0; i < numberRepeat; i++) {
+            result[i] = CpuNative.runSort(arrayLength, numberRepeatInEachCall);
+        }
+        getReportLog().printArray("ms", result, false);
+        Stat.StatResult stat = Stat.getStat(result);
+        getReportLog().printSummary("ms", stat.mAverage, stat.mStddev);
+    }
+
+    /**
+     * Matrix multiplication test, nxn matrix multiplication
+     * @param numberRepeat
+     * @param n should be multiple of 8
+     */
+    private void doMatrixMultiplication(int numberRepeat, int n) {
+        assertTrue(n % 8 == 0);
+        final int numberRepeatInEachCall = 10;
+        double[] result = new double[numberRepeat];
+        for (int i = 0; i < numberRepeat; i++) {
+            result[i] = CpuNative.runMatrixMultiplication(n, numberRepeatInEachCall);
+        }
+        getReportLog().printArray("ms", result, false);
+        Stat.StatResult stat = Stat.getStat(result);
+        getReportLog().printSummary("ms", stat.mAverage, stat.mStddev);
+    }
+
+}