| /* |
| * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| /* |
| * @test |
| * |
| * @summary converted from VM Testbase jit/escape/LockElision/MatMul. |
| * VM Testbase keywords: [jit, quick] |
| * VM Testbase readme: |
| * DESCRIPTION |
| * The test multiplies 2 matrices, first, by directly calculating matrix product |
| * elements, and second, by calculating them parallelly in diffenent threads. |
| * The results are compared then. |
| * The test, in addition to required locks, introduces locks on local variables or |
| * variables not escaping from the executing thread, and nests them manifoldly. |
| * In case of a buggy compiler, during lock elimination some code, required by |
| * the calulation may be eliminated as well, or the code may be overoptimized in |
| * some other way, causing difference in the execution results. |
| * The test has one parameter, -dim, which specifies the dimensions of matrices. |
| * |
| * @library /vmTestbase |
| * /test/lib |
| * @run driver jdk.test.lib.FileInstaller . . |
| * @build jit.escape.LockElision.MatMul.MatMul |
| * @run driver ExecDriver --java jit.escape.LockElision.MatMul.MatMul -dim 30 -threadCount 10 |
| */ |
| |
| package jit.escape.LockElision.MatMul; |
| |
| import java.util.*; |
| import java.util.concurrent.CountDownLatch; |
| import java.util.concurrent.ExecutorService; |
| import java.util.concurrent.Executors; |
| |
| import nsk.share.Consts; |
| import nsk.share.Log; |
| import nsk.share.Pair; |
| |
| import nsk.share.test.StressOptions; |
| import vm.share.options.Option; |
| import vm.share.options.OptionSupport; |
| import vm.share.options.Options; |
| |
| |
| class MatMul { |
| |
| @Option(name = "dim", description = "dimension of matrices") |
| int dim; |
| |
| @Option(name = "verbose", default_value = "false", |
| description = "verbose mode") |
| boolean verbose; |
| |
| @Option(name = "threadCount", description = "thread count") |
| int threadCount; |
| |
| @Options |
| StressOptions stressOptions = new StressOptions(); |
| |
| private Log log; |
| |
| public static void main(String[] args) { |
| MatMul test = new MatMul(); |
| OptionSupport.setup(test, args); |
| System.exit(Consts.JCK_STATUS_BASE + test.run()); |
| } |
| |
| public int run() { |
| log = new Log(System.out, verbose); |
| log.display("Parallel matrix multiplication test"); |
| |
| Matrix a = Matrix.randomMatrix(dim); |
| Matrix b = Matrix.randomMatrix(dim); |
| long t1, t2; |
| |
| t1 = System.currentTimeMillis(); |
| Matrix serialResult = serialMul(a, b); |
| t2 = System.currentTimeMillis(); |
| log.display("serial time: " + (t2 - t1) + "ms"); |
| |
| try { |
| t1 = System.currentTimeMillis(); |
| Matrix parallelResult = parallelMul(a, b, |
| threadCount * stressOptions.getThreadsFactor()); |
| t2 = System.currentTimeMillis(); |
| log.display("parallel time: " + (t2 - t1) + "ms"); |
| |
| if (!serialResult.equals(parallelResult)) { |
| log.complain("a = \n" + a); |
| log.complain("b = \n" + b); |
| |
| log.complain("serial: a * b = \n" + serialResult); |
| log.complain("serial: a * b = \n" + parallelResult); |
| return Consts.TEST_FAILED; |
| } |
| return Consts.TEST_PASSED; |
| |
| } catch (CounterIncorrectStateException e) { |
| log.complain("incorrect state of counter " + e.counter.name); |
| log.complain("expected = " + e.counter.expected); |
| log.complain("actual " + e.counter.state()); |
| return Consts.TEST_FAILED; |
| } |
| } |
| |
| public static int convolution(Seq<Integer> one, Seq<Integer> two) { |
| int res = 0; |
| int upperBound = Math.min(one.size(), two.size()); |
| for (int i = 0; i < upperBound; i++) { |
| res += one.get(i) * two.get(i); |
| } |
| return res; |
| } |
| |
| /** |
| * calculate chunked convolutuion of two sequences |
| * <p/> |
| * This special version of this method: |
| * <pre>{@code |
| * public static int chunkedConvolution(Seq<Integer> one, Seq<Integer> two, int from, int to) { |
| * int res = 0; |
| * int upperBound = Math.min(Math.min(one.size(), two.size()), to + 1); |
| * for (int i = from; i < upperBound; i++) { |
| * res += one.get(i) * two.get(i); |
| * } |
| * return res; |
| * }}</pre> |
| * <p/> |
| * that tries to fool the Lock Elision optimization: |
| * Most lock objects in these lines are really thread local, so related synchronized blocks (dummy blocks) can be removed. |
| * But several synchronized blocks (all that protected by Counter instances) are really necessary, and removing them we obtain |
| * an incorrect result. |
| * |
| * @param one |
| * @param two |
| * @param from - lower bound of sum |
| * @param to - upper bound of sum |
| * @param local - reference ThreadLocal that will be used for calculations |
| * @param bCounter - Counter instance, need to perfom checks |
| */ |
| public static int chunkedConvolutionWithDummy(Seq<Integer> one, |
| Seq<Integer> two, int from, int to, ThreadLocals local, |
| Counter bCounter) { |
| ThreadLocals conv_local1 = new ThreadLocals(local, "conv_local1"); |
| ThreadLocals conv_local2 = new ThreadLocals(conv_local1, "conv_local2"); |
| ThreadLocals conv_local3 = new ThreadLocals(null, "conv_local3"); |
| int res = 0; |
| synchronized (local) { |
| local.updateHash(); |
| int upperBound = 0; |
| synchronized (conv_local1) { |
| upperBound = local.min(one.size(), two.size()); |
| synchronized (two) { |
| //int upperBound = Math.min(Math.min(one.size(), two.size()), to + 1) : |
| upperBound = conv_local1.min(upperBound, to + 1); |
| synchronized (bCounter) { |
| bCounter.inc(); |
| } |
| } |
| for (int i = from; i < upperBound; i++) { |
| synchronized (conv_local2) { |
| conv_local1.updateHash(); |
| int prod = 0; |
| synchronized (one) { |
| int t = conv_local2.mult(one.get(i), two.get(i)); |
| synchronized (conv_local3) { |
| prod = t; |
| |
| } |
| //res += one.get(i) * two.get(i) |
| res = conv_local3.sum(res, prod); |
| } |
| } |
| } |
| } |
| return res; |
| } |
| } |
| |
| public boolean productCheck(Matrix a, Matrix b) { |
| if (a == null || b == null) { |
| log.complain("null matrix!"); |
| return false; |
| } |
| |
| if (a.dim != b.dim) { |
| log.complain("matrices dimension are differs"); |
| return false; |
| } |
| return true; |
| } |
| |
| public Matrix serialMul(Matrix a, Matrix b) { |
| if (!productCheck(a, b)) { |
| throw new IllegalArgumentException(); |
| } |
| |
| Matrix result = Matrix.zeroMatrix(a.dim); |
| for (int i = 0; i < a.dim; i++) { |
| for (int j = 0; j < a.dim; j++) { |
| result.set(i, j, convolution(a.row(i), b.column(j))); |
| } |
| } |
| return result; |
| } |
| |
| |
| /** |
| * Parallel multiplication of matrices. |
| * <p/> |
| * This special version of this method: |
| * <pre>{@code |
| * public Matrix parallelMul1(final Matrix a, final Matrix b, int threadCount) { |
| * if (!productCheck(a, b)) { |
| * throw new IllegalArgumentException(); |
| * } |
| * final int dim = a.dim; |
| * final Matrix result = Matrix.zeroMatrix(dim); |
| * <p/> |
| * ExecutorService threadPool = Executors.newFixedThreadPool(threadCount); |
| * final CountDownLatch latch = new CountDownLatch(threadCount); |
| * List<Pair<Integer, Integer>> parts = splitInterval(Pair.of(0, dim - 1), threadCount); |
| * for (final Pair<Integer, Integer> part : parts) { |
| * threadPool.submit(new Runnable() { |
| * @Override |
| * public void run() { |
| * for (int i = 0; i < dim; i++) { |
| * for (int j = 0; j < dim; j++) { |
| * synchronized (result) { |
| * int from = part.first; |
| * int to = part.second; |
| * result.add(i, j, chunkedConvolution(a.row(i), b.column(j), from, to)); |
| * } |
| * } |
| * } |
| * latch.countDown(); |
| * } |
| * }); |
| * } |
| * <p/> |
| * try { |
| * latch.await(); |
| * } catch (InterruptedException e) { |
| * e.printStackTrace(); |
| * } |
| * threadPool.shutdown(); |
| * return result; |
| * }}</pre> |
| * Lines marked with NOP comments need to fool the Lock Elision optimization: |
| * All lock objects in these lines are really thread local, so related synchronized blocks (dummy blocks) can be removed. |
| * But several synchronized blocks (that are nested in dummy blocks) are really necessary, and removing them we obtain |
| * an incorrect result. |
| * |
| * @param a first operand |
| * @param b second operand |
| * @param threadCount number of threads that will be used for calculations |
| * @return product of matrices a and b |
| */ |
| public Matrix parallelMul(final Matrix a, final Matrix b, int threadCount) |
| throws CounterIncorrectStateException { |
| if (!productCheck(a, b)) { |
| throw new IllegalArgumentException(); |
| } |
| final int dim = a.dim; |
| final Matrix result = Matrix.zeroMatrix(dim); |
| |
| ExecutorService threadPool = Executors.newFixedThreadPool(threadCount); |
| final CountDownLatch latch = new CountDownLatch(threadCount); |
| List<Pair<Integer, Integer>> parts = splitInterval(Pair.of(0, dim - 1), |
| threadCount); |
| |
| final Counter lCounter1 = new Counter(threadCount, "lCounter1"); |
| final Counter lCounter2 = new Counter(threadCount, "lCounter2"); |
| final Counter lCounter3 = new Counter(threadCount, "lCounter3"); |
| |
| final Counter bCounter1 = new Counter(threadCount * dim * dim, |
| "bCounter1"); |
| final Counter bCounter2 = new Counter(threadCount * dim * dim, |
| "bCounter2"); |
| final Counter bCounter3 = new Counter(threadCount * dim * dim, |
| "bCounter3"); |
| |
| final Counter[] counters = {lCounter1, lCounter2, lCounter3, |
| bCounter1, bCounter2, bCounter3}; |
| |
| final Map<Pair<Integer, Integer>, ThreadLocals> locals1 |
| = CollectionsUtils.newHashMap(); |
| final Map<Pair<Integer, Integer>, ThreadLocals> locals2 |
| = CollectionsUtils.newHashMap(); |
| final Map<Pair<Integer, Integer>, ThreadLocals> locals3 |
| = CollectionsUtils.newHashMap(); |
| |
| for (final Pair<Integer, Integer> part : parts) { |
| |
| ThreadLocals local1 = new ThreadLocals(null, |
| "locals1[" + part + "]"); |
| ThreadLocals local2 = new ThreadLocals(local1, |
| "locals2[" + part + "]"); |
| ThreadLocals local3 = new ThreadLocals(local2, |
| "locals3[" + part + "]"); |
| |
| locals1.put(part, local1); |
| locals2.put(part, local2); |
| locals3.put(part, local3); |
| } |
| |
| for (final Pair<Integer, Integer> part : parts) { |
| threadPool.submit(new Runnable() { |
| @Override |
| public void run() { |
| ThreadLocals local1 = locals1.get(part); |
| ThreadLocals local2 = locals2.get(part); |
| ThreadLocals local3 = locals3.get(part); |
| ThreadLocals local4 = locals3.get(part); |
| synchronized (local1) { |
| local1.updateHash(); |
| synchronized (lCounter1) { |
| lCounter1.inc(); |
| } |
| synchronized (lCounter3) { |
| synchronized (local2) { |
| local2.updateHash(); |
| lCounter3.inc(); |
| } |
| } |
| synchronized (new Object()) { |
| synchronized (lCounter2) { |
| lCounter2.inc(); |
| } |
| for (int i = 0; i < dim; i++) { |
| for (int j = 0; j < dim; j++) { |
| synchronized (bCounter1) { |
| synchronized (new Object()) { |
| bCounter1.inc(); |
| } |
| } |
| synchronized (local3) { |
| local3.updateHash(); |
| synchronized (bCounter2) { |
| bCounter2.inc(); |
| } |
| synchronized (result) { |
| local1.updateHash(); |
| synchronized (local2) { |
| local2.updateHash(); |
| int from = part.first; |
| int to = part.second; |
| result.add(i, j, |
| chunkedConvolutionWithDummy( |
| a.row(i), |
| b.column(j), |
| from, to, |
| local4, |
| bCounter3)); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| latch.countDown(); |
| } |
| }); |
| } |
| |
| try { |
| latch.await(); |
| } catch (InterruptedException e) { |
| e.printStackTrace(); |
| } |
| |
| threadPool.shutdown(); |
| for (final Pair<Integer, Integer> part : parts) { |
| log.display( |
| "hash for " + part + " = " + locals1.get(part).getHash()); |
| } |
| |
| |
| for (Counter counter : counters) { |
| if (!counter.check()) { |
| throw new CounterIncorrectStateException(counter); |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Split interval into parts |
| * |
| * @param interval - pair than encode bounds of interval |
| * @param partCount - count of parts |
| * @return list of pairs than encode bounds of parts |
| */ |
| public static List<Pair<Integer, Integer>> splitInterval( |
| Pair<Integer, Integer> interval, int partCount) { |
| if (partCount == 0) { |
| throw new IllegalArgumentException(); |
| } |
| |
| if (partCount == 1) { |
| return CollectionsUtils.asList(interval); |
| } |
| |
| int intervalSize = interval.second - interval.first + 1; |
| int partSize = intervalSize / partCount; |
| |
| List<Pair<Integer, Integer>> init = splitInterval( |
| Pair.of(interval.first, interval.second - partSize), |
| partCount - 1); |
| Pair<Integer, Integer> lastPart = Pair |
| .of(interval.second - partSize + 1, interval.second); |
| |
| return CollectionsUtils.append(init, lastPart); |
| } |
| |
| public static class Counter { |
| private int state; |
| |
| public final int expected; |
| public final String name; |
| |
| public void inc() { |
| state++; |
| } |
| |
| public int state() { |
| return state; |
| } |
| |
| public boolean check() { |
| return state == expected; |
| } |
| |
| public Counter(int expected, String name) { |
| this.expected = expected; |
| this.name = name; |
| } |
| } |
| |
| private static class CounterIncorrectStateException extends Exception { |
| public final Counter counter; |
| |
| public CounterIncorrectStateException(Counter counter) { |
| this.counter = counter; |
| } |
| } |
| |
| private static abstract class Seq<E> implements Iterable<E> { |
| @Override |
| public Iterator<E> iterator() { |
| return new Iterator<E>() { |
| private int p = 0; |
| |
| @Override |
| public boolean hasNext() { |
| return p < size(); |
| } |
| |
| @Override |
| public E next() { |
| return get(p++); |
| } |
| |
| @Override |
| public void remove() { |
| } |
| }; |
| } |
| |
| public abstract E get(int i); |
| |
| public abstract int size(); |
| } |
| |
| private static class CollectionsUtils { |
| |
| public static <K, V> Map<K, V> newHashMap() { |
| return new HashMap<K, V>(); |
| } |
| |
| public static <E> List<E> newArrayList() { |
| return new ArrayList<E>(); |
| } |
| |
| public static <E> List<E> newArrayList(Collection<E> collection) { |
| return new ArrayList<E>(collection); |
| } |
| |
| public static <E> List<E> asList(E e) { |
| List<E> result = newArrayList(); |
| result.add(e); |
| return result; |
| } |
| |
| public static <E> List<E> append(List<E> init, E last) { |
| List<E> result = newArrayList(init); |
| result.add(last); |
| return result; |
| } |
| } |
| |
| private static class Matrix { |
| |
| public final int dim; |
| private int[] coeffs; |
| |
| private Matrix(int dim) { |
| this.dim = dim; |
| this.coeffs = new int[dim * dim]; |
| } |
| |
| public void set(int i, int j, int value) { |
| coeffs[i * dim + j] = value; |
| } |
| |
| public void add(int i, int j, int value) { |
| coeffs[i * dim + j] += value; |
| } |
| |
| public int get(int i, int j) { |
| return coeffs[i * dim + j]; |
| } |
| |
| public Seq<Integer> row(final int i) { |
| return new Seq<Integer>() { |
| @Override |
| public Integer get(int j) { |
| return Matrix.this.get(i, j); |
| } |
| |
| @Override |
| public int size() { |
| return Matrix.this.dim; |
| } |
| }; |
| } |
| |
| public Seq<Integer> column(final int j) { |
| return new Seq<Integer>() { |
| @Override |
| public Integer get(int i) { |
| return Matrix.this.get(i, j); |
| } |
| |
| @Override |
| public int size() { |
| return Matrix.this.dim; |
| } |
| }; |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder builder = new StringBuilder(); |
| for (int i = 0; i < dim; i++) { |
| for (int j = 0; j < dim; j++) { |
| builder.append((j == 0) ? "" : "\t\t"); |
| builder.append(get(i, j)); |
| } |
| builder.append("\n"); |
| } |
| return builder.toString(); |
| } |
| |
| @Override |
| public boolean equals(Object other) { |
| if (!(other instanceof Matrix)) { |
| return false; |
| } |
| |
| Matrix b = (Matrix) other; |
| if (b.dim != this.dim) { |
| return false; |
| } |
| for (int i = 0; i < dim; i++) { |
| for (int j = 0; j < dim; j++) { |
| if (this.get(i, j) != b.get(i, j)) { |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private static Random random = new Random(); |
| |
| public static Matrix randomMatrix(int dim) { |
| Matrix result = new Matrix(dim); |
| for (int i = 0; i < dim; i++) { |
| for (int j = 0; j < dim; j++) { |
| result.set(i, j, random.nextInt(50)); |
| } |
| } |
| return result; |
| } |
| |
| public static Matrix zeroMatrix(int dim) { |
| Matrix result = new Matrix(dim); |
| for (int i = 0; i < dim; i++) { |
| for (int j = 0; j < dim; j++) { |
| result.set(i, j, 0); |
| } |
| } |
| return result; |
| } |
| } |
| |
| /** |
| * All instances of this class will be used in thread local context |
| */ |
| private static class ThreadLocals { |
| private static final int HASH_BOUND = 424242; |
| |
| private ThreadLocals parent; |
| private int hash = 42; |
| public final String name; |
| |
| public ThreadLocals(ThreadLocals parent, String name) { |
| this.parent = parent; |
| this.name = name; |
| } |
| |
| public int min(int a, int b) { |
| updateHash(a + b + 1); |
| return Math.min(a, b); |
| } |
| |
| public int mult(int a, int b) { |
| updateHash(a + b + 2); |
| return a * b; |
| } |
| |
| public int sum(int a, int b) { |
| updateHash(a + b + 3); |
| return a + b; |
| } |
| |
| |
| public int updateHash() { |
| return updateHash(42); |
| } |
| |
| public int updateHash(int data) { |
| hash = (hash + data) % HASH_BOUND; |
| if (parent != null) { |
| hash = parent.updateHash(hash) % HASH_BOUND; |
| } |
| return hash; |
| } |
| |
| public int getHash() { |
| return hash; |
| } |
| } |
| } |