blob: 2c7b3b137dfc6862888b7302f881bb4d8ff7a795 [file] [log] [blame]
/*
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/vm_version.hpp"
// Implementation of the platform-specific part of StubRoutines - for
// a description of how to extend it, see the stubRoutines.hpp file.
#define __ masm->
// CRC constant compute functions
static juint fold_byte(juint w, juint reverse_poly) {
for (int i = 0; i < 8; i++) {
int poly_if_odd = (-(w & 1)) & reverse_poly;
w = (w >> 1) ^ poly_if_odd;
}
return w;
}
static juint fold_word(juint w, juint reverse_poly) {
for (int i = 0; i < 32; i++) {
int poly_if_odd = (-(w & 1)) & reverse_poly;
w = (w >> 1) ^ poly_if_odd;
}
return w;
}
static julong numberOfLeadingZeros(julong p) {
julong l = 1ull << 63;
for (int i = 0; i < 64; ++i) {
if (p & l) return i;
l >>= 1;
}
return 64;
}
static julong compute_inverse_poly(julong long_poly) {
// 2^64 / p
julong mod = 0, div = 0;
int d = numberOfLeadingZeros(long_poly);
int s = d + 1;
do {
mod ^= (long_poly << s);
div |= (1L << s);
s = d - numberOfLeadingZeros(mod);
} while (s >= 0);
return div;
}
#ifndef VM_LITTLE_ENDIAN
static void reverse_bytes(juint &w) {
w = ((w >> 24) & 0xFF) | (((w >> 16) & 0xFF) << 8) | (((w >> 8) & 0xFF) << 16) | ((w & 0xFF) << 24);
}
#endif
// Constants to fold n words as needed by macroAssembler.
address StubRoutines::generate_crc_constants(juint reverse_poly) {
// Layout of constant table:
// <= Power7 Little Endian: 4 tables for byte folding
// <= Power7 Big Endian: 1 table for single byte folding + 4 tables for multi-byte folding
// >= Power8: 1 table for single byte folding + constants for fast vector implementation
const bool use_vector = VM_Version::has_vpmsumb();
const int vector_size = 16 * (CRC32_UNROLL_FACTOR2 + CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
const int size = use_vector ? CRC32_TABLE_SIZE + vector_size : (4 BIG_ENDIAN_ONLY(+1)) * CRC32_TABLE_SIZE;
const address consts = (address)malloc(size);
if (consts == NULL) {
vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "CRC constants: no enough space");
}
juint* ptr = (juint*)consts;
// Simple table used for single byte folding
LITTLE_ENDIAN_ONLY(if (use_vector)) {
for (int i = 0; i < 256; ++i) {
ptr[i] = fold_byte(i, reverse_poly);
}
}
if (!use_vector) {
BIG_ENDIAN_ONLY(ptr = (juint*)(consts + CRC32_TABLE_SIZE);)
// <= Power7: 4 tables
for (int i = 0; i < 256; ++i) {
juint a = fold_byte(i, reverse_poly),
b = fold_byte(a, reverse_poly),
c = fold_byte(b, reverse_poly),
d = fold_byte(c, reverse_poly);
#ifndef VM_LITTLE_ENDIAN
reverse_bytes(a);
reverse_bytes(b);
reverse_bytes(c);
reverse_bytes(d);
#endif
ptr[i ] = a;
ptr[i + 256] = b;
ptr[i + 2* 256] = c;
ptr[i + 3* 256] = d;
}
#if 0
for (int i = 0; i < 4; ++i) {
tty->print_cr("table %d:", i);
for (int j = 0; j < 32; ++j) {
for (int k = 0; k < 8; ++k) {
tty->print("%08x ", ptr[i*256 + j*8 + k]);
}
tty->cr();
}
}
#endif
return consts;
}
// >= Power8: vector constants
juint* ptr1 = (juint*)(consts + CRC32_TABLE_SIZE);
guarantee(((intptr_t)ptr1 & 0xF) == 0, "16-byte alignment needed");
// Generate constants for outer loop
juint v0, v1, v2, v3 = 1;
for (int i = 0; i < CRC32_UNROLL_FACTOR2 - 1; ++i) {
v0 = fold_word(v3, reverse_poly);
v1 = fold_word(v0, reverse_poly);
v2 = fold_word(v1, reverse_poly);
v3 = fold_word(v2, reverse_poly);
#ifdef VM_LITTLE_ENDIAN
ptr1[4*i ] = v3;
ptr1[4*i+1] = v2;
ptr1[4*i+2] = v3;
ptr1[4*i+3] = v2;
#else
ptr1[4*i ] = v2;
ptr1[4*i+1] = v3;
ptr1[4*i+2] = v2;
ptr1[4*i+3] = v3;
#endif
}
// Generate constants for inner loop
juint* ptr2 = ptr1 + 4 * (CRC32_UNROLL_FACTOR2 - 1);
v3 = 1; // Restart from scratch.
for (int i = 0; i < CRC32_UNROLL_FACTOR; ++i) {
v0 = fold_word(v3, reverse_poly);
v1 = fold_word(v0, reverse_poly);
v2 = fold_word(v1, reverse_poly);
v3 = fold_word(v2, reverse_poly);
if (i % CRC32_UNROLL_FACTOR2 == 0) {
int idx = CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2 - 1 - i / CRC32_UNROLL_FACTOR2;
for (int j = 0; j < 4; ++j) {
#ifdef VM_LITTLE_ENDIAN
ptr2[4*idx ] = v3;
ptr2[4*idx+1] = v2;
ptr2[4*idx+2] = v1;
ptr2[4*idx+3] = v0;
#else
ptr2[4*idx ] = v0;
ptr2[4*idx+1] = v1;
ptr2[4*idx+2] = v2;
ptr2[4*idx+3] = v3;
#endif
}
}
}
// Constants to reduce 64 to 32 bit as needed by macroAssembler.
juint* ptr3 = ptr2 + 4 * (CRC32_UNROLL_FACTOR / CRC32_UNROLL_FACTOR2);
julong* c = (julong*)ptr3;
julong long_poly = (((julong)reverse_poly) << 1) | 1;
julong inverse_long_poly = compute_inverse_poly(long_poly);
#ifdef VM_LITTLE_ENDIAN
c[0] = inverse_long_poly;
c[1] = long_poly;
#else
c[0] = long_poly;
c[1] = inverse_long_poly;
#endif
#ifdef ASSERT
if (reverse_poly == REVERSE_CRC32_POLY) {
assert(INVERSE_REVERSE_CRC32_POLY == inverse_long_poly, "sanity");
} else if (reverse_poly == REVERSE_CRC32C_POLY) {
assert(INVERSE_REVERSE_CRC32C_POLY == inverse_long_poly, "sanity");
}
#endif
//printf("inv poly: 0x%016llx\n", (long long unsigned int)inverse_long_poly);
return consts;
}