Merge pull request #5 from bell-sw/update_8u232

jdk8u232-ga
diff --git a/.hgtags b/.hgtags
index 7b31328..a933d7c 100644
--- a/.hgtags
+++ b/.hgtags
@@ -1307,3 +1307,13 @@
 55f693ba975d445d83a59cc32367ec4c2452b0c5 jdk8u222-b09
 adfdce09acc32a691145a67792d47ab637159776 jdk8u222-b10
 adfdce09acc32a691145a67792d47ab637159776 jdk8u222-ga
+afa42cf8d060a12fe2fd24210cac6c46252fcd53 jdk8u232-b01
+c963a2881865f6fab5b49a31d22651e8e1b4bf46 jdk8u232-b02
+fa7fe6dae563edaae8a8bbe8ac4bd4fa942bde0c jdk8u232-b03
+921c5ee7965fdfde75f578ddda24d5cd16f124dc jdk8u232-b04
+b13d7942036329f64c77a93cffc25e1b52523a3c jdk8u232-b05
+fea2c7f50ce8e6aee1e946eaec7b834193747d82 jdk8u232-b06
+c751303497d539aa85c6373aa0fa85580d3f3044 jdk8u232-b07
+4170228e11e6313e948e6ddcae9af3eed06b1fbe jdk8u232-b08
+12177d88b89c12c14daa5ad681030d7551e8a5a0 jdk8u232-b09
+12177d88b89c12c14daa5ad681030d7551e8a5a0 jdk8u232-ga
diff --git a/THIRD_PARTY_README b/THIRD_PARTY_README
index 814e5f2..7dc54a0 100644
--- a/THIRD_PARTY_README
+++ b/THIRD_PARTY_README
@@ -1470,60 +1470,90 @@
 
 -------------------------------------------------------------------------------
 
-%% This notice is provided with respect to libpng 1.6.35, which may be
+%% This notice is provided with respect to libpng 1.6.37, which may be
 included with JRE 8, JDK 8, and OpenJDK 8.
 
 --- begin of LICENSE ---
 
-This copy of the libpng notices is provided for your convenience.  In case of
-any discrepancy between this copy and the notices in the file png.h that is
-included in the libpng distribution, the latter shall prevail.
+COPYRIGHT NOTICE, DISCLAIMER, and LICENSE
+=========================================
 
-COPYRIGHT NOTICE, DISCLAIMER, and LICENSE:
+PNG Reference Library License version 2
+---------------------------------------
 
-If you modify libpng you may insert additional notices immediately following
-this sentence.
+ * Copyright (c) 1995-2019 The PNG Reference Library Authors.
+ * Copyright (c) 2018-2019 Cosmin Truta.
+ * Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson.
+ * Copyright (c) 1996-1997 Andreas Dilger.
+ * Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.
 
-This code is released under the libpng license.
+The software is supplied "as is", without warranty of any kind,
+express or implied, including, without limitation, the warranties
+of merchantability, fitness for a particular purpose, title, and
+non-infringement.  In no event shall the Copyright owners, or
+anyone distributing the software, be liable for any damages or
+other liability, whether in contract, tort or otherwise, arising
+from, out of, or in connection with the software, or the use or
+other dealings in the software, even if advised of the possibility
+of such damage.
 
-libpng versions 1.0.7, July 1, 2000 through 1.6.35, July 15, 2018 are
+Permission is hereby granted to use, copy, modify, and distribute
+this software, or portions hereof, for any purpose, without fee,
+subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you
+    must not claim that you wrote the original software.  If you
+    use this software in a product, an acknowledgment in the product
+    documentation would be appreciated, but is not required.
+
+ 2. Altered source versions must be plainly marked as such, and must
+    not be misrepresented as being the original software.
+
+ 3. This Copyright notice may not be removed or altered from any
+    source or altered source distribution.
+
+
+PNG Reference Library License version 1 (for libpng 0.5 through 1.6.35)
+-----------------------------------------------------------------------
+
+libpng versions 1.0.7, July 1, 2000, through 1.6.35, July 15, 2018 are
 Copyright (c) 2000-2002, 2004, 2006-2018 Glenn Randers-Pehrson, are
 derived from libpng-1.0.6, and are distributed according to the same
 disclaimer and license as libpng-1.0.6 with the following individuals
 added to the list of Contributing Authors:
 
-   Simon-Pierre Cadieux
-   Eric S. Raymond
-   Mans Rullgard
-   Cosmin Truta
-   Gilles Vollant
-   James Yu
-   Mandar Sahastrabuddhe
-   Google Inc.
-   Vadim Barkov
+    Simon-Pierre Cadieux
+    Eric S. Raymond
+    Mans Rullgard
+    Cosmin Truta
+    Gilles Vollant
+    James Yu
+    Mandar Sahastrabuddhe
+    Google Inc.
+    Vadim Barkov
 
 and with the following additions to the disclaimer:
 
-   There is no warranty against interference with your enjoyment of the
-   library or against infringement.  There is no warranty that our
-   efforts or the library will fulfill any of your particular purposes
-   or needs.  This library is provided with all faults, and the entire
-   risk of satisfactory quality, performance, accuracy, and effort is with
-   the user.
+    There is no warranty against interference with your enjoyment of
+    the library or against infringement.  There is no warranty that our
+    efforts or the library will fulfill any of your particular purposes
+    or needs.  This library is provided with all faults, and the entire
+    risk of satisfactory quality, performance, accuracy, and effort is
+    with the user.
 
 Some files in the "contrib" directory and some configure-generated
-files that are distributed with libpng have other copyright owners and
+files that are distributed with libpng have other copyright owners, and
 are released under other open source licenses.
 
 libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
 Copyright (c) 1998-2000 Glenn Randers-Pehrson, are derived from
 libpng-0.96, and are distributed according to the same disclaimer and
-license as libpng-0.96, with the following individuals added to the list
-of Contributing Authors:
+license as libpng-0.96, with the following individuals added to the
+list of Contributing Authors:
 
-   Tom Lane
-   Glenn Randers-Pehrson
-   Willem van Schaik
+    Tom Lane
+    Glenn Randers-Pehrson
+    Willem van Schaik
 
 libpng versions 0.89, June 1996, through 0.96, May 1997, are
 Copyright (c) 1996-1997 Andreas Dilger, are derived from libpng-0.88,
@@ -1531,14 +1561,14 @@
 libpng-0.88, with the following individuals added to the list of
 Contributing Authors:
 
-   John Bowler
-   Kevin Bracey
-   Sam Bushell
-   Magnus Holmgren
-   Greg Roelofs
-   Tom Tanner
+    John Bowler
+    Kevin Bracey
+    Sam Bushell
+    Magnus Holmgren
+    Greg Roelofs
+    Tom Tanner
 
-Some files in the "scripts" directory have other copyright owners
+Some files in the "scripts" directory have other copyright owners,
 but are released under this license.
 
 libpng versions 0.5, May 1995, through 0.88, January 1996, are
@@ -1547,39 +1577,38 @@
 For the purposes of this copyright and license, "Contributing Authors"
 is defined as the following set of individuals:
 
-   Andreas Dilger
-   Dave Martindale
-   Guy Eric Schalnat
-   Paul Schmidt
-   Tim Wegner
+    Andreas Dilger
+    Dave Martindale
+    Guy Eric Schalnat
+    Paul Schmidt
+    Tim Wegner
 
-The PNG Reference Library is supplied "AS IS".  The Contributing Authors
-and Group 42, Inc. disclaim all warranties, expressed or implied,
-including, without limitation, the warranties of merchantability and of
-fitness for any purpose.  The Contributing Authors and Group 42, Inc.
-assume no liability for direct, indirect, incidental, special, exemplary,
-or consequential damages, which may result from the use of the PNG
-Reference Library, even if advised of the possibility of such damage.
+The PNG Reference Library is supplied "AS IS".  The Contributing
+Authors and Group 42, Inc. disclaim all warranties, expressed or
+implied, including, without limitation, the warranties of
+merchantability and of fitness for any purpose.  The Contributing
+Authors and Group 42, Inc. assume no liability for direct, indirect,
+incidental, special, exemplary, or consequential damages, which may
+result from the use of the PNG Reference Library, even if advised of
+the possibility of such damage.
 
 Permission is hereby granted to use, copy, modify, and distribute this
 source code, or portions hereof, for any purpose, without fee, subject
 to the following restrictions:
 
-  1. The origin of this source code must not be misrepresented.
+ 1. The origin of this source code must not be misrepresented.
 
-  2. Altered versions must be plainly marked as such and must not
-     be misrepresented as being the original source.
+ 2. Altered versions must be plainly marked as such and must not
+    be misrepresented as being the original source.
 
-  3. This Copyright notice may not be removed or altered from any
-     source or altered source distribution.
+ 3. This Copyright notice may not be removed or altered from any
+    source or altered source distribution.
 
-The Contributing Authors and Group 42, Inc. specifically permit, without
-fee, and encourage the use of this source code as a component to
-supporting the PNG file format in commercial products.  If you use this
-source code in a product, acknowledgment is not required but would be
-appreciated.
-
-END OF COPYRIGHT NOTICE, DISCLAIMER, and LICENSE.
+The Contributing Authors and Group 42, Inc. specifically permit,
+without fee, and encourage the use of this source code as a component
+to supporting the PNG file format in commercial products.  If you use
+this source code in a product, acknowledgment is not required but would
+be appreciated.
 
 TRADEMARK:
 
@@ -2101,13 +2130,13 @@
 
 -------------------------------------------------------------------------------
 
-%% This notice is provided with respect to PC/SC Lite for Suse Linux v.1.1.1,
+%% This notice is provided with respect to PC/SC Lite v1.8.24,
 which may be included with JRE 8, JDK 8, and OpenJDK 8 on Linux and Solaris.
 
 --- begin of LICENSE ---
 
-Copyright (c) 1999-2004 David Corcoran <corcoran@linuxnet.com>
-Copyright (c) 1999-2004 Ludovic Rousseau <ludovic.rousseau (at) free.fr>
+Copyright (c) 1999-2003 David Corcoran <corcoran@linuxnet.com>
+Copyright (c) 2001-2011 Ludovic Rousseau <ludovic.rousseau@free.fr>
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -2119,15 +2148,10 @@
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.
-3. All advertising materials mentioning features or use of this software
-   must display the following acknowledgement:
-     This product includes software developed by: 
-      David Corcoran <corcoran@linuxnet.com>
-      http://www.linuxnet.com (MUSCLE)
-4. The name of the author may not be used to endorse or promote products
+3. The name of the author may not be used to endorse or promote products
    derived from this software without specific prior written permission.
 
-Changes to this license can be made only by the copyright author with 
+Changes to this license can be made only by the copyright author with
 explicit written consent.
 
 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
diff --git a/agent/src/os/linux/ps_core.c b/agent/src/os/linux/ps_core.c
index 541c857..c62e58d 100644
--- a/agent/src/os/linux/ps_core.c
+++ b/agent/src/os/linux/ps_core.c
@@ -865,8 +865,51 @@
 #define LD_BASE_OFFSET        offsetof(struct r_debug,  r_ldbase)
 #define LINK_MAP_ADDR_OFFSET  offsetof(struct link_map, l_addr)
 #define LINK_MAP_NAME_OFFSET  offsetof(struct link_map, l_name)
+#define LINK_MAP_LD_OFFSET    offsetof(struct link_map, l_ld)
 #define LINK_MAP_NEXT_OFFSET  offsetof(struct link_map, l_next)
 
+// Calculate the load address of shared library
+// on prelink-enabled environment.
+//
+// In case of GDB, it would be calculated by offset of link_map.l_ld
+// and the address of .dynamic section.
+// See GDB implementation: lm_addr_check @ solib-svr4.c
+static uintptr_t calc_prelinked_load_address(struct ps_prochandle* ph, int lib_fd, ELF_EHDR* elf_ehdr, uintptr_t link_map_addr) {
+  ELF_PHDR *phbuf;
+  uintptr_t lib_ld;
+  uintptr_t lib_dyn_addr = 0L;
+  uintptr_t load_addr;
+  int i;
+
+  phbuf = read_program_header_table(lib_fd, elf_ehdr);
+  if (phbuf == NULL) {
+    print_debug("can't read program header of shared object\n");
+    return 0L;
+  }
+
+  // Get the address of .dynamic section from shared library.
+  for (i = 0; i < elf_ehdr->e_phnum; i++) {
+    if (phbuf[i].p_type == PT_DYNAMIC) {
+      lib_dyn_addr = phbuf[i].p_vaddr;
+      break;
+    }
+  }
+
+  free(phbuf);
+
+  if (ps_pdread(ph, (psaddr_t)link_map_addr + LINK_MAP_LD_OFFSET,
+               &lib_ld, sizeof(uintptr_t)) != PS_OK) {
+    print_debug("can't read address of dynamic section in shared object\n");
+    return 0L;
+  }
+
+  // Return the load address which is calculated by the address of .dynamic
+  // and link_map.l_ld .
+  load_addr = lib_ld - lib_dyn_addr;
+  print_debug("lib_ld = 0x%lx, lib_dyn_addr = 0x%lx -> lib_base_diff = 0x%lx\n", lib_ld, lib_dyn_addr, load_addr);
+  return load_addr;
+}
+
 // read shared library info from runtime linker's data structures.
 // This work is done by librtlb_db in Solaris
 static bool read_shared_lib_info(struct ps_prochandle* ph) {
@@ -968,6 +1011,14 @@
             // continue with other libraries...
          } else {
             if (read_elf_header(lib_fd, &elf_ehdr)) {
+               if (lib_base_diff == 0x0L) {
+                 lib_base_diff = calc_prelinked_load_address(ph, lib_fd, &elf_ehdr, link_map_addr);
+                 if (lib_base_diff == 0x0L) {
+                   close(lib_fd);
+                   return false;
+                 }
+               }
+
                lib_base = lib_base_diff + find_base_address(lib_fd, &elf_ehdr);
                print_debug("reading library %s @ 0x%lx [ 0x%lx ]\n",
                            lib_name, lib_base, lib_base_diff);
diff --git a/agent/src/os/linux/ps_proc.c b/agent/src/os/linux/ps_proc.c
index abf2d60..c4d6a9e 100644
--- a/agent/src/os/linux/ps_proc.c
+++ b/agent/src/os/linux/ps_proc.c
@@ -345,7 +345,7 @@
 
 static bool read_lib_info(struct ps_prochandle* ph) {
   char fname[32];
-  char buf[256];
+  char buf[PATH_MAX];
   FILE *fp = NULL;
 
   sprintf(fname, "/proc/%d/maps", ph->pid);
@@ -355,10 +355,41 @@
     return false;
   }
 
-  while(fgets_no_cr(buf, 256, fp)){
-    char * word[6];
-    int nwords = split_n_str(buf, 6, word, ' ', '\0');
-    if (nwords > 5 && find_lib(ph, word[5]) == false) {
+  while(fgets_no_cr(buf, PATH_MAX, fp)){
+    char * word[7];
+    int nwords = split_n_str(buf, 7, word, ' ', '\0');
+
+    if (nwords < 6) {
+      // not a shared library entry. ignore.
+      continue;
+    }
+
+    // SA does not handle the lines with patterns:
+    //   "[stack]", "[heap]", "[vdso]", "[vsyscall]", etc.
+    if (word[5][0] == '[') {
+        // not a shared library entry. ignore.
+        continue;
+    }
+
+    if (nwords > 6) {
+      // prelink altered mapfile when the program is running.
+      // Entries like one below have to be skipped
+      //  /lib64/libc-2.15.so (deleted)
+      // SO name in entries like one below have to be stripped.
+      //  /lib64/libpthread-2.15.so.#prelink#.EECVts
+      char *s = strstr(word[5],".#prelink#");
+      if (s == NULL) {
+        // No prelink keyword. skip deleted library
+        print_debug("skip shared object %s deleted by prelink\n", word[5]);
+        continue;
+      }
+
+      // Fall through
+      print_debug("rectifying shared object name %s changed by prelink\n", word[5]);
+      *s = 0;
+    }
+
+    if (find_lib(ph, word[5]) == false) {
        intptr_t base;
        lib_info* lib;
 #ifdef _LP64
diff --git a/make/aix/makefiles/jsig.make b/make/aix/makefiles/jsig.make
index f8bf938..ae453f2 100644
--- a/make/aix/makefiles/jsig.make
+++ b/make/aix/makefiles/jsig.make
@@ -54,10 +54,15 @@
   JSIG_DEBUG_CFLAGS = -g
 endif
 
+# Optimize jsig lib at level -O3 unless it's a slowdebug build
+ifneq ($(DEBUG_LEVEL), slowdebug)
+  JSIG_OPT_FLAGS = $(OPT_CFLAGS)
+endif
+
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CXX) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $< -ldl
+                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(JSIG_OPT_FLAGS) -o $@ $< -ldl
 
 #ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 #	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
diff --git a/make/bsd/makefiles/jsig.make b/make/bsd/makefiles/jsig.make
index 29779b0..b5860bf 100644
--- a/make/bsd/makefiles/jsig.make
+++ b/make/bsd/makefiles/jsig.make
@@ -59,10 +59,15 @@
   JSIG_DEBUG_CFLAGS = -g
 endif
 
+# Optimize jsig lib at level -O3 unless it's a slowdebug build
+ifneq ($(DEBUG_LEVEL), slowdebug)
+  JSIG_OPT_FLAGS = $(OPT_CFLAGS)
+endif
+
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(EXTRA_CFLAGS) -o $@ $<
+                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(JSIG_OPT_FLAGS) $(EXTRA_CFLAGS) -o $@ $<
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(OS_VENDOR), Darwin)
 	$(DSYMUTIL) $@
diff --git a/make/linux/makefiles/jsig.make b/make/linux/makefiles/jsig.make
index 9bf3b73..6290db5 100644
--- a/make/linux/makefiles/jsig.make
+++ b/make/linux/makefiles/jsig.make
@@ -51,10 +51,15 @@
   JSIG_DEBUG_CFLAGS = -g
 endif
 
+# Optimize jsig lib at level -O3 unless it's a slowdebug build
+ifneq ($(DEBUG_LEVEL), slowdebug)
+  JSIG_OPT_FLAGS = $(OPT_CFLAGS)
+endif
+
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(EXTRA_CFLAGS) -o $@ $< -ldl
+                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(JSIG_OPT_FLAGS) $(EXTRA_CFLAGS) -o $@ $< -ldl
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifneq ($(STRIP_POLICY),no_strip)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
diff --git a/make/linux/makefiles/zeroshark.make b/make/linux/makefiles/zeroshark.make
index 4480740..de1fcb3 100644
--- a/make/linux/makefiles/zeroshark.make
+++ b/make/linux/makefiles/zeroshark.make
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
 # Copyright 2007, 2008 Red Hat, Inc.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
@@ -25,8 +25,16 @@
 
 # Setup common to Zero (non-Shark) and Shark versions of VM
 
-# override this from the main file because some version of llvm do not like -Wundef
-WARNING_FLAGS = -Wpointer-arith -Wsign-compare -Wunused-function -Wunused-value
+# Some versions of llvm do not like -Wundef
+ifeq ($(JVM_VARIANT_ZEROSHARK), true)
+  WARNING_FLAGS += -Wno-undef
+endif
+# Suppress some warning flags that are normally turned on for hotspot,
+# because some of the zero code has not been updated accordingly.
+WARNING_FLAGS += -Wno-return-type \
+  -Wno-format-nonliteral -Wno-format-security \
+  -Wno-maybe-uninitialized
+ 
 
 # If FDLIBM_CFLAGS is non-empty it holds CFLAGS needed to be passed to
 # the compiler so as to be able to produce optimized objects
@@ -48,5 +56,3 @@
 ifeq ($(ARCH_DATA_MODEL), 64)
   CFLAGS += -D_LP64=1
 endif
-
-OPT_CFLAGS/compactingPermGenGen.o = -O1
diff --git a/make/solaris/makefiles/jsig.make b/make/solaris/makefiles/jsig.make
index bbc0982..74b0721 100644
--- a/make/solaris/makefiles/jsig.make
+++ b/make/solaris/makefiles/jsig.make
@@ -47,10 +47,15 @@
 LFLAGS_JSIG += -mt -xnolib
 endif
 
+# Optimize jsig lib unless it's a slowdebug build
+ifneq ($(DEBUG_LEVEL), slowdebug)
+  JSIG_OPT_FLAGS = -xO4 -g
+endif
+
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) -o $@ $(JSIGSRCDIR)/jsig.c -ldl
+                         $(LFLAGS_JSIG) $(JSIG_OPT_FLAGS) -o $@ $(JSIGSRCDIR)/jsig.c -ldl
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
diff --git a/src/cpu/ppc/vm/assembler_ppc.hpp b/src/cpu/ppc/vm/assembler_ppc.hpp
index b14f2b6..88ac813 100644
--- a/src/cpu/ppc/vm/assembler_ppc.hpp
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp
@@ -2000,7 +2000,8 @@
   inline void vsbox(       VectorRegister d, VectorRegister a);
 
   // SHA (introduced with Power 8)
-  // Not yet implemented.
+  inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
+  inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
 
   // Vector Binary Polynomial Multiplication (introduced with Power 8)
   inline void vpmsumb(  VectorRegister d, VectorRegister a, VectorRegister b);
@@ -2096,6 +2097,11 @@
   inline void lvsl(  VectorRegister d, Register s2);
   inline void lvsr(  VectorRegister d, Register s2);
 
+  // Endianess specific concatenation of 2 loaded vectors.
+  inline void load_perm(VectorRegister perm, Register addr);
+  inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
+  inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
+
   // RegisterOrConstant versions.
   // These emitters choose between the versions using two registers and
   // those with register and immediate, depending on the content of roc.
diff --git a/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/src/cpu/ppc/vm/assembler_ppc.inline.hpp
index 1d2c05e..5d80663 100644
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp
@@ -789,7 +789,8 @@
 inline void Assembler::vsbox(       VectorRegister d, VectorRegister a)                   { emit_int32( VSBOX_OPCODE        | vrt(d) | vra(a)         ); }
 
 // SHA (introduced with Power 8)
-// Not yet implemented.
+inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
+inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
 
 // Vector Binary Polynomial Multiplication (introduced with Power 8)
 inline void Assembler::vpmsumb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@@ -887,6 +888,30 @@
 inline void Assembler::lvsl(  VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE   | vrt(d) | rb(s2)); }
 inline void Assembler::lvsr(  VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE   | vrt(d) | rb(s2)); }
 
+inline void Assembler::load_perm(VectorRegister perm, Register addr) {
+#if defined(VM_LITTLE_ENDIAN)
+  lvsr(perm, addr);
+#else
+  lvsl(perm, addr);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+  vperm(first_dest, second, first_dest, perm);
+#else
+  vperm(first_dest, first_dest, second, perm);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+  vperm(dest, second, first, perm);
+#else
+  vperm(dest, first, second, perm);
+#endif
+}
+
 inline void Assembler::load_const(Register d, void* x, Register tmp) {
    load_const(d, (long)x, tmp);
 }
diff --git a/src/cpu/ppc/vm/macroAssembler_ppc.hpp b/src/cpu/ppc/vm/macroAssembler_ppc.hpp
index 3c6cea5..afcec9a 100644
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp
@@ -667,6 +667,40 @@
 
   void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
 
+  // SHA-2 auxiliary functions and public interfaces
+ private:
+  void sha256_deque(const VectorRegister src,
+      const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
+  void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
+  void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+  void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
+      const int total_ws, const Register k, const VectorRegister* kpws,
+      const int total_kpws);
+  void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
+      const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
+      const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
+      const Register j, const Register k);
+  void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
+      const VectorRegister c, const VectorRegister d, const VectorRegister e,
+      const VectorRegister f, const VectorRegister g, const VectorRegister h,
+      const Register hptr);
+
+  void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
+  void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
+  void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+  void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
+  void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
+      const VectorRegister w2, const VectorRegister w3,
+      const VectorRegister w4, const VectorRegister w5,
+      const VectorRegister w6, const VectorRegister w7,
+      const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
+      const VectorRegister vRb, const Register k);
+
+ public:
+  void sha256(bool multi_block);
+  void sha512(bool multi_block);
+
+
   //
   // Debugging
   //
diff --git a/src/cpu/ppc/vm/macroAssembler_ppc_sha.cpp b/src/cpu/ppc/vm/macroAssembler_ppc_sha.cpp
new file mode 100644
index 0000000..7a82ed3
--- /dev/null
+++ b/src/cpu/ppc/vm/macroAssembler_ppc_sha.cpp
@@ -0,0 +1,1136 @@
+// Copyright (c) 2017 Instituto de Pesquisas Eldorado. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+
+// Implemented according to "Descriptions of SHA-256, SHA-384, and SHA-512"
+// (http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf).
+
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
+/**********************************************************************
+ * SHA 256
+ *********************************************************************/
+
+void MacroAssembler::sha256_deque(const VectorRegister src,
+                                  const VectorRegister dst1,
+                                  const VectorRegister dst2,
+                                  const VectorRegister dst3) {
+  vsldoi (dst1, src, src, 12);
+  vsldoi (dst2, src, src, 8);
+  vsldoi (dst3, src, src, 4);
+}
+
+void MacroAssembler::sha256_round(const VectorRegister* hs,
+                                  const int total_hs,
+                                  int& h_cnt,
+                                  const VectorRegister kpw) {
+  // convenience registers: cycle from 0-7 downwards
+  const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
+  // temporaries
+  VectorRegister ch  = VR0;
+  VectorRegister maj = VR1;
+  VectorRegister bsa = VR2;
+  VectorRegister bse = VR3;
+  VectorRegister vt0 = VR4;
+  VectorRegister vt1 = VR5;
+  VectorRegister vt2 = VR6;
+  VectorRegister vt3 = VR7;
+
+  vsel       (ch,  g,   f, e);
+  vxor       (maj, a,   b);
+  vshasigmaw (bse, e,   1, 0xf);
+  vadduwm    (vt2, ch,  kpw);
+  vadduwm    (vt1, h,   bse);
+  vsel       (maj, b,   c, maj);
+  vadduwm    (vt3, vt1, vt2);
+  vshasigmaw (bsa, a,   1, 0);
+  vadduwm    (vt0, bsa, maj);
+
+  vadduwm    (d,   d,   vt3);
+  vadduwm    (h,   vt3, vt0);
+
+  // advance vector pointer to the next iteration
+  h_cnt++;
+}
+
+void MacroAssembler::sha256_load_h_vec(const VectorRegister a,
+                                       const VectorRegister e,
+                                       const Register hptr) {
+  // temporaries
+  Register tmp = R8;
+  VectorRegister vt0 = VR0;
+  VectorRegister vRb = VR6;
+  // labels
+  Label sha256_aligned;
+
+  andi_  (tmp,  hptr, 0xf);
+  lvx    (a,    hptr);
+  addi   (tmp,  hptr, 16);
+  lvx    (e,    tmp);
+  beq    (CCR0, sha256_aligned);
+
+  // handle unaligned accesses
+  load_perm(vRb, hptr);
+  addi   (tmp, hptr, 32);
+  vec_perm(a,   e,    vRb);
+
+  lvx    (vt0,  tmp);
+  vec_perm(e,   vt0,  vRb);
+
+  // aligned accesses
+  bind(sha256_aligned);
+}
+
+void MacroAssembler::sha256_load_w_plus_k_vec(const Register buf_in,
+                                              const VectorRegister* ws,
+                                              const int total_ws,
+                                              const Register k,
+                                              const VectorRegister* kpws,
+                                              const int total_kpws) {
+  Label w_aligned, after_w_load;
+
+  Register tmp       = R8;
+  VectorRegister vt0 = VR0;
+  VectorRegister vt1 = VR1;
+  VectorRegister vRb = VR6;
+
+  andi_ (tmp, buf_in, 0xF);
+  beq   (CCR0, w_aligned); // address ends with 0x0, not 0x8
+
+  // deal with unaligned addresses
+  lvx    (ws[0], buf_in);
+  load_perm(vRb, buf_in);
+
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w_cur = ws[n];
+    VectorRegister w_prev = ws[n-1];
+
+    addi (tmp, buf_in, n * 16);
+    lvx  (w_cur, tmp);
+    vec_perm(w_prev, w_cur, vRb);
+  }
+  addi   (tmp, buf_in, total_ws * 16);
+  lvx    (vt0, tmp);
+  vec_perm(ws[total_ws-1], vt0, vRb);
+  b      (after_w_load);
+
+  bind(w_aligned);
+
+  // deal with aligned addresses
+  lvx(ws[0], buf_in);
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+    addi (tmp, buf_in, n * 16);
+    lvx  (w, tmp);
+  }
+
+  bind(after_w_load);
+
+#if defined(VM_LITTLE_ENDIAN)
+  // Byte swapping within int values
+  li       (tmp, 8);
+  lvsl     (vt0, tmp);
+  vspltisb (vt1, 0xb);
+  vxor     (vt1, vt0, vt1);
+  for (int n = 0; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+    vec_perm(w, w, vt1);
+  }
+#endif
+
+  // Loading k, which is always aligned to 16-bytes
+  lvx    (kpws[0], k);
+  for (int n = 1; n < total_kpws; n++) {
+    VectorRegister kpw = kpws[n];
+    addi (tmp, k, 16 * n);
+    lvx  (kpw, tmp);
+  }
+
+  // Add w to K
+  assert(total_ws == total_kpws, "Redesign the loop below");
+  for (int n = 0; n < total_kpws; n++) {
+    VectorRegister kpw = kpws[n];
+    VectorRegister w   = ws[n];
+
+    vadduwm  (kpw, kpw, w);
+  }
+}
+
+void MacroAssembler::sha256_calc_4w(const VectorRegister w0,
+                                    const VectorRegister w1,
+                                    const VectorRegister w2,
+                                    const VectorRegister w3,
+                                    const VectorRegister kpw0,
+                                    const VectorRegister kpw1,
+                                    const VectorRegister kpw2,
+                                    const VectorRegister kpw3,
+                                    const Register j,
+                                    const Register k) {
+  // Temporaries
+  const VectorRegister  vt0  = VR0;
+  const VectorRegister  vt1  = VR1;
+  const VectorSRegister vsrt1 = vt1->to_vsr();
+  const VectorRegister  vt2  = VR2;
+  const VectorRegister  vt3  = VR3;
+  const VectorSRegister vst3 = vt3->to_vsr();
+  const VectorRegister  vt4  = VR4;
+
+  // load to k[j]
+  lvx        (vt0, j,   k);
+
+  // advance j
+  addi       (j,   j,   16); // 16 bytes were read
+
+#if defined(VM_LITTLE_ENDIAN)
+  // b = w[j-15], w[j-14], w[j-13], w[j-12]
+  vsldoi     (vt1, w1,  w0, 12);
+
+  // c = w[j-7], w[j-6], w[j-5], w[j-4]
+  vsldoi     (vt2, w3,  w2, 12);
+
+#else
+  // b = w[j-15], w[j-14], w[j-13], w[j-12]
+  vsldoi     (vt1, w0,  w1, 4);
+
+  // c = w[j-7], w[j-6], w[j-5], w[j-4]
+  vsldoi     (vt2, w2,  w3, 4);
+#endif
+
+  // d = w[j-2], w[j-1], w[j-4], w[j-3]
+  vsldoi     (vt3, w3,  w3, 8);
+
+  // b = s0(w[j-15]) , s0(w[j-14]) , s0(w[j-13]) , s0(w[j-12])
+  vshasigmaw (vt1, vt1, 0,  0);
+
+  // d = s1(w[j-2]) , s1(w[j-1]) , s1(w[j-4]) , s1(w[j-3])
+  vshasigmaw (vt3, vt3, 0,  0xf);
+
+  // c = s0(w[j-15]) + w[j-7],
+  //     s0(w[j-14]) + w[j-6],
+  //     s0(w[j-13]) + w[j-5],
+  //     s0(w[j-12]) + w[j-4]
+  vadduwm    (vt2, vt1, vt2);
+
+  // c = s0(w[j-15]) + w[j-7] + w[j-16],
+  //     s0(w[j-14]) + w[j-6] + w[j-15],
+  //     s0(w[j-13]) + w[j-5] + w[j-14],
+  //     s0(w[j-12]) + w[j-4] + w[j-13]
+  vadduwm    (vt2, vt2, w0);
+
+  // e = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+  //     s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+  //     s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j-4]), // UNDEFINED
+  //     s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j-3])  // UNDEFINED
+  vadduwm    (vt4, vt2, vt3);
+
+  // At this point, e[0] and e[1] are the correct values to be stored at w[j]
+  // and w[j+1].
+  // e[2] and e[3] are not considered.
+  // b = s1(w[j]) , s1(s(w[j+1]) , UNDEFINED , UNDEFINED
+  vshasigmaw (vt1, vt4, 0,  0xf);
+
+  // v5 = s1(w[j-2]) , s1(w[j-1]) , s1(w[j]) , s1(w[j+1])
+#if defined(VM_LITTLE_ENDIAN)
+  xxmrgld    (vst3, vsrt1, vst3);
+#else
+  xxmrghd    (vst3, vst3, vsrt1);
+#endif
+
+  // c = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+  //     s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+  //     s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j]),   // w[j+2]
+  //     s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j+1])  // w[j+4]
+  vadduwm    (vt2, vt2, vt3);
+
+  // Updating w0 to w3 to hold the new previous 16 values from w.
+  vmr        (w0,  w1);
+  vmr        (w1,  w2);
+  vmr        (w2,  w3);
+  vmr        (w3,  vt2);
+
+  // store k + w to v9 (4 values at once)
+#if defined(VM_LITTLE_ENDIAN)
+  vadduwm    (kpw0, vt2, vt0);
+
+  vsldoi     (kpw1, kpw0, kpw0, 12);
+  vsldoi     (kpw2, kpw0, kpw0, 8);
+  vsldoi     (kpw3, kpw0, kpw0, 4);
+#else
+  vadduwm    (kpw3, vt2, vt0);
+
+  vsldoi     (kpw2, kpw3, kpw3, 12);
+  vsldoi     (kpw1, kpw3, kpw3, 8);
+  vsldoi     (kpw0, kpw3, kpw3, 4);
+#endif
+}
+
+void MacroAssembler::sha256_update_sha_state(const VectorRegister a,
+                                             const VectorRegister b_,
+                                             const VectorRegister c,
+                                             const VectorRegister d,
+                                             const VectorRegister e,
+                                             const VectorRegister f,
+                                             const VectorRegister g,
+                                             const VectorRegister h,
+                                             const Register hptr) {
+  // temporaries
+  VectorRegister vt0  = VR0;
+  VectorRegister vt1  = VR1;
+  VectorRegister vt2  = VR2;
+  VectorRegister vt3  = VR3;
+  VectorRegister vt4  = VR4;
+  VectorRegister vt5  = VR5;
+  VectorRegister vaux = VR6;
+  VectorRegister vRb  = VR6;
+  Register tmp        = R8;
+  Register of16       = R8;
+  Register of32       = R9;
+  Label state_load_aligned;
+
+  // Load hptr
+  andi_   (tmp, hptr, 0xf);
+  li      (of16, 16);
+  lvx     (vt0, hptr);
+  lvx     (vt5, of16, hptr);
+  beq     (CCR0, state_load_aligned);
+
+  // handle unaligned accesses
+  li      (of32, 32);
+  load_perm(vRb, hptr);
+
+  vec_perm(vt0, vt5,  vRb);        // vt0 = hptr[0]..hptr[3]
+
+  lvx     (vt1, hptr, of32);
+  vec_perm(vt5, vt1,  vRb);        // vt5 = hptr[4]..hptr[7]
+
+  // aligned accesses
+  bind(state_load_aligned);
+
+#if defined(VM_LITTLE_ENDIAN)
+  vmrglw  (vt1, b_, a);            // vt1 = {a, b, ?, ?}
+  vmrglw  (vt2, d, c);             // vt2 = {c, d, ?, ?}
+  vmrglw  (vt3, f, e);             // vt3 = {e, f, ?, ?}
+  vmrglw  (vt4, h, g);             // vt4 = {g, h, ?, ?}
+  xxmrgld (vt1->to_vsr(), vt2->to_vsr(), vt1->to_vsr()); // vt1 = {a, b, c, d}
+  xxmrgld (vt3->to_vsr(), vt4->to_vsr(), vt3->to_vsr()); // vt3 = {e, f, g, h}
+  vadduwm (a,   vt0, vt1);         // a = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
+  vadduwm (e,   vt5, vt3);         // e = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
+
+  // Save hptr back, works for any alignment
+  xxswapd (vt0->to_vsr(), a->to_vsr());
+  stxvd2x (vt0->to_vsr(), hptr);
+  xxswapd (vt5->to_vsr(), e->to_vsr());
+  stxvd2x (vt5->to_vsr(), of16, hptr);
+#else
+  vmrglw  (vt1, a, b_);            // vt1 = {a, b, ?, ?}
+  vmrglw  (vt2, c, d);             // vt2 = {c, d, ?, ?}
+  vmrglw  (vt3, e, f);             // vt3 = {e, f, ?, ?}
+  vmrglw  (vt4, g, h);             // vt4 = {g, h, ?, ?}
+  xxmrgld (vt1->to_vsr(), vt1->to_vsr(), vt2->to_vsr()); // vt1 = {a, b, c, d}
+  xxmrgld (vt3->to_vsr(), vt3->to_vsr(), vt4->to_vsr()); // vt3 = {e, f, g, h}
+  vadduwm (d,   vt0, vt1);         // d = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
+  vadduwm (h,   vt5, vt3);         // h = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
+
+  // Save hptr back, works for any alignment
+  stxvd2x (d->to_vsr(), hptr);
+  stxvd2x (h->to_vsr(), of16, hptr);
+#endif
+}
+
+static const uint32_t sha256_round_table[64] __attribute((aligned(16))) = {
+  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+static const uint32_t *sha256_round_consts = sha256_round_table;
+
+//   R3_ARG1   - byte[]  Input string with padding but in Big Endian
+//   R4_ARG2   - int[]   SHA.state (at first, the root of primes)
+//   R5_ARG3   - int     offset
+//   R6_ARG4   - int     limit
+//
+//   Internal Register usage:
+//   R7        - k
+//   R8        - tmp | j | of16
+//   R9        - of32
+//   VR0-VR8   - ch, maj, bsa, bse, vt0-vt3 | vt0-vt5, vaux/vRb
+//   VR9-VR16  - a-h
+//   VR17-VR20 - w0-w3
+//   VR21-VR23 - vRb | vaux0-vaux2
+//   VR24-VR27 - kpw0-kpw3
+void MacroAssembler::sha256(bool multi_block) {
+  static const ssize_t buf_size = 64;
+  static const uint8_t w_size = sizeof(sha256_round_table)/sizeof(uint32_t);
+#ifdef AIX
+  // malloc provides 16 byte alignment
+  if (((uintptr_t)sha256_round_consts & 0xF) != 0) {
+    uint32_t *new_round_consts = (uint32_t*)malloc(sizeof(sha256_round_table));
+    guarantee(new_round_consts, "oom");
+    memcpy(new_round_consts, sha256_round_consts, sizeof(sha256_round_table));
+    sha256_round_consts = (const uint32_t*)new_round_consts;
+  }
+#endif
+
+  Register buf_in = R3_ARG1;
+  Register state  = R4_ARG2;
+  Register ofs    = R5_ARG3;
+  Register limit  = R6_ARG4;
+
+  Label sha_loop, core_loop;
+
+  // Save non-volatile vector registers in the red zone
+  static const VectorRegister nv[] = {
+    VR20, VR21, VR22, VR23, VR24, VR25, VR26, VR27/*, VR28, VR29, VR30, VR31*/
+  };
+  static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
+
+  for (int c = 0; c < nv_size; c++) {
+    Register tmp = R8;
+    li  (tmp, (c - (nv_size)) * 16);
+    stvx(nv[c], tmp, R1);
+  }
+
+  // Load hash state to registers
+  VectorRegister a = VR9;
+  VectorRegister b = VR10;
+  VectorRegister c = VR11;
+  VectorRegister d = VR12;
+  VectorRegister e = VR13;
+  VectorRegister f = VR14;
+  VectorRegister g = VR15;
+  VectorRegister h = VR16;
+  static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
+  static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
+  // counter for cycling through hs vector to avoid register moves between iterations
+  int h_cnt = 0;
+
+  // Load a-h registers from the memory pointed by state
+#if defined(VM_LITTLE_ENDIAN)
+  sha256_load_h_vec(a, e, state);
+#else
+  sha256_load_h_vec(d, h, state);
+#endif
+
+  // keep k loaded also during MultiBlock loops
+  Register k = R7;
+  assert(((uintptr_t)sha256_round_consts & 0xF) == 0, "k alignment");
+  load_const_optimized(k, (address)sha256_round_consts, R0);
+
+  // Avoiding redundant loads
+  if (multi_block) {
+    align(OptoLoopAlignment);
+  }
+  bind(sha_loop);
+#if defined(VM_LITTLE_ENDIAN)
+  sha256_deque(a, b, c, d);
+  sha256_deque(e, f, g, h);
+#else
+  sha256_deque(d, c, b, a);
+  sha256_deque(h, g, f, e);
+#endif
+
+  // Load 16 elements from w out of the loop.
+  // Order of the int values is Endianess specific.
+  VectorRegister w0 = VR17;
+  VectorRegister w1 = VR18;
+  VectorRegister w2 = VR19;
+  VectorRegister w3 = VR20;
+  static const VectorRegister ws[] = {w0, w1, w2, w3};
+  static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
+
+  VectorRegister kpw0 = VR24;
+  VectorRegister kpw1 = VR25;
+  VectorRegister kpw2 = VR26;
+  VectorRegister kpw3 = VR27;
+  static const VectorRegister kpws[] = {kpw0, kpw1, kpw2, kpw3};
+  static const int total_kpws = sizeof(kpws)/sizeof(VectorRegister);
+
+  sha256_load_w_plus_k_vec(buf_in, ws, total_ws, k, kpws, total_kpws);
+
+  // Cycle through the first 16 elements
+  assert(total_ws == total_kpws, "Redesign the loop below");
+  for (int n = 0; n < total_ws; n++) {
+    VectorRegister vaux0 = VR21;
+    VectorRegister vaux1 = VR22;
+    VectorRegister vaux2 = VR23;
+
+    sha256_deque(kpws[n], vaux0, vaux1, vaux2);
+
+#if defined(VM_LITTLE_ENDIAN)
+    sha256_round(hs, total_hs, h_cnt, kpws[n]);
+    sha256_round(hs, total_hs, h_cnt, vaux0);
+    sha256_round(hs, total_hs, h_cnt, vaux1);
+    sha256_round(hs, total_hs, h_cnt, vaux2);
+#else
+    sha256_round(hs, total_hs, h_cnt, vaux2);
+    sha256_round(hs, total_hs, h_cnt, vaux1);
+    sha256_round(hs, total_hs, h_cnt, vaux0);
+    sha256_round(hs, total_hs, h_cnt, kpws[n]);
+#endif
+  }
+
+  Register tmp = R8;
+  // loop the 16th to the 64th iteration by 8 steps
+  li   (tmp, (w_size - 16) / total_hs);
+  mtctr(tmp);
+
+  // j will be aligned to 4 for loading words.
+  // Whenever read, advance the pointer (e.g: when j is used in a function)
+  Register j = R8;
+  li   (j, 16*4);
+
+  align(OptoLoopAlignment);
+  bind(core_loop);
+
+  // due to VectorRegister rotate, always iterate in multiples of total_hs
+  for (int n = 0; n < total_hs/4; n++) {
+    sha256_calc_4w(w0, w1, w2, w3, kpw0, kpw1, kpw2, kpw3, j, k);
+    sha256_round(hs, total_hs, h_cnt, kpw0);
+    sha256_round(hs, total_hs, h_cnt, kpw1);
+    sha256_round(hs, total_hs, h_cnt, kpw2);
+    sha256_round(hs, total_hs, h_cnt, kpw3);
+  }
+
+  bdnz   (core_loop);
+
+  // Update hash state
+  sha256_update_sha_state(a, b, c, d, e, f, g, h, state);
+
+  if (multi_block) {
+    addi(buf_in, buf_in, buf_size);
+    addi(ofs, ofs, buf_size);
+    cmplw(CCR0, ofs, limit);
+    ble(CCR0, sha_loop);
+
+    // return ofs
+    mr(R3_RET, ofs);
+  }
+
+  // Restore non-volatile registers
+  for (int c = 0; c < nv_size; c++) {
+    Register tmp = R8;
+    li  (tmp, (c - (nv_size)) * 16);
+    lvx(nv[c], tmp, R1);
+  }
+}
+
+
+/**********************************************************************
+ * SHA 512
+ *********************************************************************/
+
+void MacroAssembler::sha512_load_w_vec(const Register buf_in,
+                                       const VectorRegister* ws,
+                                       const int total_ws) {
+  Register tmp       = R8;
+  VectorRegister vRb = VR8;
+  VectorRegister aux = VR9;
+  Label is_aligned, after_alignment;
+
+  andi_  (tmp, buf_in, 0xF);
+  beq    (CCR0, is_aligned); // address ends with 0x0, not 0x8
+
+  // deal with unaligned addresses
+  lvx    (ws[0], buf_in);
+  load_perm(vRb, buf_in);
+
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w_cur = ws[n];
+    VectorRegister w_prev = ws[n-1];
+    addi (tmp, buf_in, n * 16);
+    lvx  (w_cur, tmp);
+    vec_perm(w_prev, w_cur, vRb);
+  }
+  addi   (tmp, buf_in, total_ws * 16);
+  lvx    (aux, tmp);
+  vec_perm(ws[total_ws-1], aux, vRb);
+  b      (after_alignment);
+
+  bind(is_aligned);
+  lvx  (ws[0], buf_in);
+  for (int n = 1; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+    addi (tmp, buf_in, n * 16);
+    lvx  (w, tmp);
+  }
+
+  bind(after_alignment);
+}
+
+// Update hash state
+void MacroAssembler::sha512_update_sha_state(const Register state,
+                                             const VectorRegister* hs,
+                                             const int total_hs) {
+
+#if defined(VM_LITTLE_ENDIAN)
+  int start_idx = 0;
+#else
+  int start_idx = 1;
+#endif
+
+  // load initial hash from the memory pointed by state
+  VectorRegister ini_a = VR10;
+  VectorRegister ini_c = VR12;
+  VectorRegister ini_e = VR14;
+  VectorRegister ini_g = VR16;
+  static const VectorRegister inis[] = {ini_a, ini_c, ini_e, ini_g};
+  static const int total_inis = sizeof(inis)/sizeof(VectorRegister);
+
+  Label state_save_aligned, after_state_save_aligned;
+
+  Register addr      = R7;
+  Register tmp       = R8;
+  VectorRegister vRb = VR8;
+  VectorRegister aux = VR9;
+
+  andi_(tmp, state, 0xf);
+  beq(CCR0, state_save_aligned);
+  // deal with unaligned addresses
+
+  {
+    VectorRegister a = hs[0];
+    VectorRegister b_ = hs[1];
+    VectorRegister c = hs[2];
+    VectorRegister d = hs[3];
+    VectorRegister e = hs[4];
+    VectorRegister f = hs[5];
+    VectorRegister g = hs[6];
+    VectorRegister h = hs[7];
+    load_perm(vRb, state);
+    lvx    (ini_a, state);
+    addi   (addr, state, 16);
+
+    lvx    (ini_c, addr);
+    addi   (addr, state, 32);
+    vec_perm(ini_a, ini_c, vRb);
+
+    lvx    (ini_e, addr);
+    addi   (addr, state, 48);
+    vec_perm(ini_c, ini_e, vRb);
+
+    lvx    (ini_g, addr);
+    addi   (addr, state, 64);
+    vec_perm(ini_e, ini_g, vRb);
+
+    lvx    (aux, addr);
+    vec_perm(ini_g, aux, vRb);
+
+#if defined(VM_LITTLE_ENDIAN)
+    xxmrgld(a->to_vsr(), b_->to_vsr(), a->to_vsr());
+    xxmrgld(c->to_vsr(), d->to_vsr(), c->to_vsr());
+    xxmrgld(e->to_vsr(), f->to_vsr(), e->to_vsr());
+    xxmrgld(g->to_vsr(), h->to_vsr(), g->to_vsr());
+#else
+    xxmrgld(b_->to_vsr(), a->to_vsr(), b_->to_vsr());
+    xxmrgld(d->to_vsr(), c->to_vsr(), d->to_vsr());
+    xxmrgld(f->to_vsr(), e->to_vsr(), f->to_vsr());
+    xxmrgld(h->to_vsr(), g->to_vsr(), h->to_vsr());
+#endif
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+      VectorRegister ini_cur = inis[n/2];
+
+      vaddudm(h_cur, ini_cur, h_cur);
+    }
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+
+      mfvrd  (tmp, h_cur);
+#if defined(VM_LITTLE_ENDIAN)
+      std    (tmp, 8*n + 8, state);
+#else
+      std    (tmp, 8*n - 8, state);
+#endif
+      vsldoi (aux, h_cur, h_cur, 8);
+      mfvrd  (tmp, aux);
+      std    (tmp, 8*n + 0, state);
+    }
+
+    b      (after_state_save_aligned);
+  }
+
+  bind(state_save_aligned);
+  {
+    for (int n = 0; n < total_hs; n += 2) {
+#if defined(VM_LITTLE_ENDIAN)
+      VectorRegister h_cur = hs[n];
+      VectorRegister h_next = hs[n+1];
+#else
+      VectorRegister h_cur = hs[n+1];
+      VectorRegister h_next = hs[n];
+#endif
+      VectorRegister ini_cur = inis[n/2];
+
+      if (n/2 == 0) {
+        lvx(ini_cur, state);
+      } else {
+        addi(addr, state, (n/2) * 16);
+        lvx(ini_cur, addr);
+      }
+      xxmrgld(h_cur->to_vsr(), h_next->to_vsr(), h_cur->to_vsr());
+    }
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+      VectorRegister ini_cur = inis[n/2];
+
+      vaddudm(h_cur, ini_cur, h_cur);
+    }
+
+    for (int n = start_idx; n < total_hs; n += 2) {
+      VectorRegister h_cur = hs[n];
+
+      if (n/2 == 0) {
+        stvx(h_cur, state);
+      } else {
+        addi(addr, state, (n/2) * 16);
+        stvx(h_cur, addr);
+      }
+    }
+  }
+
+  bind(after_state_save_aligned);
+}
+
+// Use h_cnt to cycle through hs elements but also increment it at the end
+void MacroAssembler::sha512_round(const VectorRegister* hs,
+                                  const int total_hs, int& h_cnt,
+                                  const VectorRegister kpw) {
+
+  // convenience registers: cycle from 0-7 downwards
+  const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
+  const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
+  // temporaries
+  const VectorRegister Ch   = VR20;
+  const VectorRegister Maj  = VR21;
+  const VectorRegister bsa  = VR22;
+  const VectorRegister bse  = VR23;
+  const VectorRegister tmp1 = VR24;
+  const VectorRegister tmp2 = VR25;
+
+  vsel      (Ch,   g,    f,   e);
+  vxor      (Maj,  a,    b);
+  vshasigmad(bse,  e,    1,   0xf);
+  vaddudm   (tmp2, Ch,   kpw);
+  vaddudm   (tmp1, h,    bse);
+  vsel      (Maj,  b,    c,   Maj);
+  vaddudm   (tmp1, tmp1, tmp2);
+  vshasigmad(bsa,  a,    1,   0);
+  vaddudm   (tmp2, bsa,  Maj);
+  vaddudm   (d,    d,    tmp1);
+  vaddudm   (h,    tmp1, tmp2);
+
+  // advance vector pointer to the next iteration
+  h_cnt++;
+}
+
+void MacroAssembler::sha512_calc_2w(const VectorRegister w0,
+                                    const VectorRegister w1,
+                                    const VectorRegister w2,
+                                    const VectorRegister w3,
+                                    const VectorRegister w4,
+                                    const VectorRegister w5,
+                                    const VectorRegister w6,
+                                    const VectorRegister w7,
+                                    const VectorRegister kpw0,
+                                    const VectorRegister kpw1,
+                                    const Register j,
+                                    const VectorRegister vRb,
+                                    const Register k) {
+  // Temporaries
+  const VectorRegister VR_a = VR20;
+  const VectorRegister VR_b = VR21;
+  const VectorRegister VR_c = VR22;
+  const VectorRegister VR_d = VR23;
+
+  // load to k[j]
+  lvx        (VR_a, j,    k);
+  // advance j
+  addi       (j,    j,    16); // 16 bytes were read
+
+#if defined(VM_LITTLE_ENDIAN)
+  // v6 = w[j-15], w[j-14]
+  vperm      (VR_b, w1,   w0,  vRb);
+  // v12 = w[j-7], w[j-6]
+  vperm      (VR_c, w5,   w4,  vRb);
+#else
+  // v6 = w[j-15], w[j-14]
+  vperm      (VR_b, w0,   w1,  vRb);
+  // v12 = w[j-7], w[j-6]
+  vperm      (VR_c, w4,   w5,  vRb);
+#endif
+
+  // v6 = s0(w[j-15]) , s0(w[j-14])
+  vshasigmad (VR_b, VR_b,    0,   0);
+  // v5 = s1(w[j-2]) , s1(w[j-1])
+  vshasigmad (VR_d, w7,      0,   0xf);
+  // v6 = s0(w[j-15]) + w[j-7] , s0(w[j-14]) + w[j-6]
+  vaddudm    (VR_b, VR_b, VR_c);
+  // v8 = s1(w[j-2]) + w[j-16] , s1(w[j-1]) + w[j-15]
+  vaddudm    (VR_d, VR_d, w0);
+  // v9 = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+  //      s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+  vaddudm    (VR_c, VR_d, VR_b);
+  // Updating w0 to w7 to hold the new previous 16 values from w.
+  vmr        (w0,   w1);
+  vmr        (w1,   w2);
+  vmr        (w2,   w3);
+  vmr        (w3,   w4);
+  vmr        (w4,   w5);
+  vmr        (w5,   w6);
+  vmr        (w6,   w7);
+  vmr        (w7,   VR_c);
+
+#if defined(VM_LITTLE_ENDIAN)
+  // store k + w to kpw0 (2 values at once)
+  vaddudm    (kpw0, VR_c, VR_a);
+  // kpw1 holds (k + w)[1]
+  vsldoi     (kpw1, kpw0, kpw0, 8);
+#else
+  // store k + w to kpw0 (2 values at once)
+  vaddudm    (kpw1, VR_c, VR_a);
+  // kpw1 holds (k + w)[1]
+  vsldoi     (kpw0, kpw1, kpw1, 8);
+#endif
+}
+
+void MacroAssembler::sha512_load_h_vec(const Register state,
+                                       const VectorRegister* hs,
+                                       const int total_hs) {
+#if defined(VM_LITTLE_ENDIAN)
+  VectorRegister a   = hs[0];
+  VectorRegister g   = hs[6];
+  int start_idx = 0;
+#else
+  VectorRegister a   = hs[1];
+  VectorRegister g   = hs[7];
+  int start_idx = 1;
+#endif
+
+  Register addr      = R7;
+  VectorRegister vRb = VR8;
+  Register tmp       = R8;
+  Label state_aligned, after_state_aligned;
+
+  andi_(tmp, state, 0xf);
+  beq(CCR0, state_aligned);
+
+  // deal with unaligned addresses
+  VectorRegister aux = VR9;
+
+  lvx(hs[start_idx], state);
+  load_perm(vRb, state);
+
+  for (int n = start_idx + 2; n < total_hs; n += 2) {
+    VectorRegister h_cur   = hs[n];
+    VectorRegister h_prev2 = hs[n - 2];
+    addi(addr, state, (n/2) * 16);
+    lvx(h_cur, addr);
+    vec_perm(h_prev2, h_cur, vRb);
+  }
+  addi(addr, state, (total_hs/2) * 16);
+  lvx    (aux, addr);
+  vec_perm(hs[total_hs - 2 + start_idx], aux, vRb);
+  b      (after_state_aligned);
+
+  bind(state_aligned);
+
+  // deal with aligned addresses
+  lvx(hs[start_idx], state);
+
+  for (int n = start_idx + 2; n < total_hs; n += 2) {
+    VectorRegister h_cur = hs[n];
+    addi(addr, state, (n/2) * 16);
+    lvx(h_cur, addr);
+  }
+
+  bind(after_state_aligned);
+}
+
+static const uint64_t sha512_round_table[80] __attribute((aligned(16))) = {
+  0x428a2f98d728ae22, 0x7137449123ef65cd,
+  0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
+  0x3956c25bf348b538, 0x59f111f1b605d019,
+  0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
+  0xd807aa98a3030242, 0x12835b0145706fbe,
+  0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+  0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
+  0x9bdc06a725c71235, 0xc19bf174cf692694,
+  0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
+  0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
+  0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
+  0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+  0x983e5152ee66dfab, 0xa831c66d2db43210,
+  0xb00327c898fb213f, 0xbf597fc7beef0ee4,
+  0xc6e00bf33da88fc2, 0xd5a79147930aa725,
+  0x06ca6351e003826f, 0x142929670a0e6e70,
+  0x27b70a8546d22ffc, 0x2e1b21385c26c926,
+  0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+  0x650a73548baf63de, 0x766a0abb3c77b2a8,
+  0x81c2c92e47edaee6, 0x92722c851482353b,
+  0xa2bfe8a14cf10364, 0xa81a664bbc423001,
+  0xc24b8b70d0f89791, 0xc76c51a30654be30,
+  0xd192e819d6ef5218, 0xd69906245565a910,
+  0xf40e35855771202a, 0x106aa07032bbd1b8,
+  0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
+  0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
+  0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
+  0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
+  0x748f82ee5defb2fc, 0x78a5636f43172f60,
+  0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+  0x90befffa23631e28, 0xa4506cebde82bde9,
+  0xbef9a3f7b2c67915, 0xc67178f2e372532b,
+  0xca273eceea26619c, 0xd186b8c721c0c207,
+  0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
+  0x06f067aa72176fba, 0x0a637dc5a2c898a6,
+  0x113f9804bef90dae, 0x1b710b35131c471b,
+  0x28db77f523047d84, 0x32caab7b40c72493,
+  0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
+  0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
+  0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
+};
+static const uint64_t *sha512_round_consts = sha512_round_table;
+
+//   R3_ARG1   - byte[]  Input string with padding but in Big Endian
+//   R4_ARG2   - int[]   SHA.state (at first, the root of primes)
+//   R5_ARG3   - int     offset
+//   R6_ARG4   - int     limit
+//
+//   Internal Register usage:
+//   R7 R8 R9  - volatile temporaries
+//   VR0-VR7   - a-h
+//   VR8       - vRb
+//   VR9       - aux (highly volatile, use with care)
+//   VR10-VR17 - w0-w7 | ini_a-ini_h
+//   VR18      - vsp16 | kplusw0
+//   VR19      - vsp32 | kplusw1
+//   VR20-VR25 - sha512_calc_2w and sha512_round temporaries
+void MacroAssembler::sha512(bool multi_block) {
+  static const ssize_t buf_size = 128;
+  static const uint8_t w_size = sizeof(sha512_round_table)/sizeof(uint64_t);
+#ifdef AIX
+  // malloc provides 16 byte alignment
+  if (((uintptr_t)sha512_round_consts & 0xF) != 0) {
+    uint64_t *new_round_consts = (uint64_t*)malloc(sizeof(sha512_round_table));
+    guarantee(new_round_consts, "oom");
+    memcpy(new_round_consts, sha512_round_consts, sizeof(sha512_round_table));
+    sha512_round_consts = (const uint64_t*)new_round_consts;
+  }
+#endif
+
+  Register buf_in = R3_ARG1;
+  Register state  = R4_ARG2;
+  Register ofs    = R5_ARG3;
+  Register limit  = R6_ARG4;
+
+  Label sha_loop, core_loop;
+
+  // Save non-volatile vector registers in the red zone
+  static const VectorRegister nv[] = {
+    VR20, VR21, VR22, VR23, VR24, VR25/*, VR26, VR27, VR28, VR29, VR30, VR31*/
+  };
+  static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
+
+  for (int c = 0; c < nv_size; c++) {
+    Register idx = R7;
+    li  (idx, (c - (nv_size)) * 16);
+    stvx(nv[c], idx, R1);
+  }
+
+  // Load hash state to registers
+  VectorRegister a = VR0;
+  VectorRegister b = VR1;
+  VectorRegister c = VR2;
+  VectorRegister d = VR3;
+  VectorRegister e = VR4;
+  VectorRegister f = VR5;
+  VectorRegister g = VR6;
+  VectorRegister h = VR7;
+  static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
+  static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
+  // counter for cycling through hs vector to avoid register moves between iterations
+  int h_cnt = 0;
+
+  // Load a-h registers from the memory pointed by state
+  sha512_load_h_vec(state, hs, total_hs);
+
+  Register k = R9;
+  assert(((uintptr_t)sha512_round_consts & 0xF) == 0, "k alignment");
+  load_const_optimized(k, (address)sha512_round_consts, R0);
+
+  if (multi_block) {
+    align(OptoLoopAlignment);
+  }
+  bind(sha_loop);
+
+  for (int n = 0; n < total_hs; n += 2) {
+#if defined(VM_LITTLE_ENDIAN)
+    VectorRegister h_cur = hs[n];
+    VectorRegister h_next = hs[n + 1];
+#else
+    VectorRegister h_cur = hs[n + 1];
+    VectorRegister h_next = hs[n];
+#endif
+    vsldoi (h_next, h_cur, h_cur, 8);
+  }
+
+  // Load 16 elements from w out of the loop.
+  // Order of the long values is Endianess specific.
+  VectorRegister w0 = VR10;
+  VectorRegister w1 = VR11;
+  VectorRegister w2 = VR12;
+  VectorRegister w3 = VR13;
+  VectorRegister w4 = VR14;
+  VectorRegister w5 = VR15;
+  VectorRegister w6 = VR16;
+  VectorRegister w7 = VR17;
+  static const VectorRegister ws[] = {w0, w1, w2, w3, w4, w5, w6, w7};
+  static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
+
+  // Load 16 w into vectors and setup vsl for vperm
+  sha512_load_w_vec(buf_in, ws, total_ws);
+
+#if defined(VM_LITTLE_ENDIAN)
+  VectorRegister vsp16 = VR18;
+  VectorRegister vsp32 = VR19;
+  VectorRegister shiftarg = VR9;
+
+  vspltisw(vsp16,    8);
+  vspltisw(shiftarg, 1);
+  vsl     (vsp16,    vsp16, shiftarg);
+  vsl     (vsp32,    vsp16, shiftarg);
+
+  VectorRegister vsp8 = VR9;
+  vspltish(vsp8,     8);
+
+  // Convert input from Big Endian to Little Endian
+  for (int c = 0; c < total_ws; c++) {
+    VectorRegister w = ws[c];
+    vrlh  (w, w, vsp8);
+  }
+  for (int c = 0; c < total_ws; c++) {
+    VectorRegister w = ws[c];
+    vrlw  (w, w, vsp16);
+  }
+  for (int c = 0; c < total_ws; c++) {
+    VectorRegister w = ws[c];
+    vrld  (w, w, vsp32);
+  }
+#endif
+
+  Register Rb        = R10;
+  VectorRegister vRb = VR8;
+  li      (Rb, 8);
+  load_perm(vRb, Rb);
+
+  VectorRegister kplusw0 = VR18;
+  VectorRegister kplusw1 = VR19;
+
+  Register addr      = R7;
+
+  for (int n = 0; n < total_ws; n++) {
+    VectorRegister w = ws[n];
+
+    if (n == 0) {
+      lvx  (kplusw0, k);
+    } else {
+      addi (addr, k, n * 16);
+      lvx  (kplusw0, addr);
+    }
+#if defined(VM_LITTLE_ENDIAN)
+    vaddudm(kplusw0, kplusw0, w);
+    vsldoi (kplusw1, kplusw0, kplusw0, 8);
+#else
+    vaddudm(kplusw1, kplusw0, w);
+    vsldoi (kplusw0, kplusw1, kplusw1, 8);
+#endif
+
+    sha512_round(hs, total_hs, h_cnt, kplusw0);
+    sha512_round(hs, total_hs, h_cnt, kplusw1);
+  }
+
+  Register tmp       = R8;
+  li    (tmp, (w_size-16)/total_hs);
+  mtctr (tmp);
+  // j will be aligned to 4 for loading words.
+  // Whenever read, advance the pointer (e.g: when j is used in a function)
+  Register j = tmp;
+  li     (j, 8*16);
+
+  align(OptoLoopAlignment);
+  bind(core_loop);
+
+  // due to VectorRegister rotate, always iterate in multiples of total_hs
+  for (int n = 0; n < total_hs/2; n++) {
+    sha512_calc_2w(w0, w1, w2, w3, w4, w5, w6, w7, kplusw0, kplusw1, j, vRb, k);
+    sha512_round(hs, total_hs, h_cnt, kplusw0);
+    sha512_round(hs, total_hs, h_cnt, kplusw1);
+  }
+
+  bdnz   (core_loop);
+
+  sha512_update_sha_state(state, hs, total_hs);
+
+  if (multi_block) {
+    addi(buf_in, buf_in, buf_size);
+    addi(ofs, ofs, buf_size);
+    cmplw(CCR0, ofs, limit);
+    ble(CCR0, sha_loop);
+
+    // return ofs
+    mr(R3_RET, ofs);
+  }
+
+  // Restore non-volatile registers
+  for (int c = 0; c < nv_size; c++) {
+    Register idx = R7;
+    li  (idx, (c - (nv_size)) * 16);
+    lvx(nv[c], idx, R1);
+  }
+}
diff --git a/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/src/cpu/ppc/vm/stubGenerator_ppc.cpp
index 12dd846..7f2c442 100644
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@@ -2224,7 +2224,7 @@
     return start;
   }
 
-  // Arguments for generated stub (little endian only):
+  // Arguments for generated stub:
   //   R3_ARG1   - source byte array address
   //   R4_ARG2   - destination byte array address
   //   R5_ARG3   - round key array
@@ -2243,7 +2243,6 @@
     Register keylen         = R8;
     Register temp           = R9;
     Register keypos         = R10;
-    Register hex            = R11;
     Register fifteen        = R12;
 
     VectorRegister vRet     = VR0;
@@ -2263,164 +2262,170 @@
     VectorRegister vTmp3    = VR11;
     VectorRegister vTmp4    = VR12;
 
-    VectorRegister vLow     = VR13;
-    VectorRegister vHigh    = VR14;
-
-    __ li              (hex, 16);
     __ li              (fifteen, 15);
-    __ vspltisb        (fSplt, 0x0f);
 
     // load unaligned from[0-15] to vsRet
     __ lvx             (vRet, from);
     __ lvx             (vTmp1, fifteen, from);
     __ lvsl            (fromPerm, from);
+#ifdef VM_LITTLE_ENDIAN
+    __ vspltisb        (fSplt, 0x0f);
     __ vxor            (fromPerm, fromPerm, fSplt);
+#endif
     __ vperm           (vRet, vRet, vTmp1, fromPerm);
 
     // load keylen (44 or 52 or 60)
     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
 
     // to load keys
-    __ lvsr            (keyPerm, key);
-    __ vxor            (vTmp2, vTmp2, vTmp2);
+    __ load_perm       (keyPerm, key);
+#ifdef VM_LITTLE_ENDIAN
     __ vspltisb        (vTmp2, -16);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
+#endif
 
-    // load the 1st round key to vKey1
-    __ li              (keypos, 0);
+    // load the 1st round key to vTmp1
+    __ lvx             (vTmp1, key);
+    __ li              (keypos, 16);
     __ lvx             (vKey1, keypos, key);
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey1, vTmp1, vKey1, keyPerm);
+    __ vec_perm        (vTmp1, vKey1, keyPerm);
 
     // 1st round
-    __ vxor (vRet, vRet, vKey1);
+    __ vxor            (vRet, vRet, vTmp1);
 
     // load the 2nd round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 32);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, keyPerm);
 
     // load the 3rd round key to vKey2
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ li              (keypos, 48);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, keyPerm);
 
     // load the 4th round key to vKey3
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 64);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, keyPerm);
 
     // load the 5th round key to vKey4
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 80);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey4, vTmp1, keyPerm);
 
     // 2nd - 5th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
-    __ vcipher (vRet, vRet, vKey3);
-    __ vcipher (vRet, vRet, vKey4);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey3);
+    __ vcipher         (vRet, vRet, vKey4);
 
     // load the 6th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 96);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 7th round key to vKey2
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ li              (keypos, 112);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, keyPerm);
 
     // load the 8th round key to vKey3
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 128);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, keyPerm);
 
     // load the 9th round key to vKey4
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 144);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey4, vTmp1, keyPerm);
 
     // 6th - 9th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
-    __ vcipher (vRet, vRet, vKey3);
-    __ vcipher (vRet, vRet, vKey4);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey3);
+    __ vcipher         (vRet, vRet, vKey4);
 
     // load the 10th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 160);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 11th round key to vKey2
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 176);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey2, vTmp1, keyPerm);
 
     // if all round keys are loaded, skip next 4 rounds
     __ cmpwi           (CCR0, keylen, 44);
     __ beq             (CCR0, L_doLast);
 
     // 10th - 11th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
 
     // load the 12th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 192);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 13th round key to vKey2
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 208);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey2, vTmp1, keyPerm);
 
     // if all round keys are loaded, skip next 2 rounds
     __ cmpwi           (CCR0, keylen, 52);
     __ beq             (CCR0, L_doLast);
 
     // 12th - 13th rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipher (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipher         (vRet, vRet, vKey2);
 
     // load the 14th round key to vKey1
-    __ addi            (keypos, keypos, 16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
+    __ li              (keypos, 224);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
 
     // load the 15th round key to vKey2
-    __ addi            (keypos, keypos, 16);
+    __ li              (keypos, 240);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey2, vTmp1, keyPerm);
 
     __ bind(L_doLast);
 
     // last two rounds
-    __ vcipher (vRet, vRet, vKey1);
-    __ vcipherlast (vRet, vRet, vKey2);
+    __ vcipher         (vRet, vRet, vKey1);
+    __ vcipherlast     (vRet, vRet, vKey2);
 
-    __ neg             (temp, to);
-    __ lvsr            (toPerm, temp);
-    __ vspltisb        (vTmp2, -1);
-    __ vxor            (vTmp1, vTmp1, vTmp1);
-    __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
-    __ vxor            (toPerm, toPerm, fSplt);
+    // store result (unaligned)
+#ifdef VM_LITTLE_ENDIAN
+    __ lvsl            (toPerm, to);
+#else
+    __ lvsr            (toPerm, to);
+#endif
+    __ vspltisb        (vTmp3, -1);
+    __ vspltisb        (vTmp4, 0);
     __ lvx             (vTmp1, to);
-    __ vperm           (vRet, vRet, vRet, toPerm);
-    __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
-    __ lvx             (vTmp4, fifteen, to);
+    __ lvx             (vTmp2, fifteen, to);
+#ifdef VM_LITTLE_ENDIAN
+    __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
+    __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
+#else
+    __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
+#endif
+    __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
+    __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
+    __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
+    __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
     __ stvx            (vTmp1, to);
-    __ vsel            (vRet, vRet, vTmp4, vTmp2);
-    __ stvx            (vRet, fifteen, to);
 
     __ blr();
      return start;
   }
 
-  // Arguments for generated stub (little endian only):
+  // Arguments for generated stub:
   //   R3_ARG1   - source byte array address
   //   R4_ARG2   - destination byte array address
   //   R5_ARG3   - K (key) in little endian int array
@@ -2442,7 +2447,6 @@
     Register keylen         = R8;
     Register temp           = R9;
     Register keypos         = R10;
-    Register hex            = R11;
     Register fifteen        = R12;
 
     VectorRegister vRet     = VR0;
@@ -2463,30 +2467,30 @@
     VectorRegister vTmp3    = VR12;
     VectorRegister vTmp4    = VR13;
 
-    VectorRegister vLow     = VR14;
-    VectorRegister vHigh    = VR15;
-
-    __ li              (hex, 16);
     __ li              (fifteen, 15);
-    __ vspltisb        (fSplt, 0x0f);
 
     // load unaligned from[0-15] to vsRet
     __ lvx             (vRet, from);
     __ lvx             (vTmp1, fifteen, from);
     __ lvsl            (fromPerm, from);
+#ifdef VM_LITTLE_ENDIAN
+    __ vspltisb        (fSplt, 0x0f);
     __ vxor            (fromPerm, fromPerm, fSplt);
+#endif
     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
 
     // load keylen (44 or 52 or 60)
     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
 
     // to load keys
-    __ lvsr            (keyPerm, key);
+    __ load_perm       (keyPerm, key);
+#ifdef VM_LITTLE_ENDIAN
     __ vxor            (vTmp2, vTmp2, vTmp2);
     __ vspltisb        (vTmp2, -16);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vrld            (keyPerm, keyPerm, vTmp2);
     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
+#endif
 
     __ cmpwi           (CCR0, keylen, 44);
     __ beq             (CCR0, L_do44);
@@ -2494,32 +2498,32 @@
     __ cmpwi           (CCR0, keylen, 52);
     __ beq             (CCR0, L_do52);
 
-    // load the 15th round key to vKey11
+    // load the 15th round key to vKey1
     __ li              (keypos, 240);
+    __ lvx             (vKey1, keypos, key);
+    __ li              (keypos, 224);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
+
+    // load the 14th round key to vKey2
+    __ li              (keypos, 208);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
+
+    // load the 13th round key to vKey3
+    __ li              (keypos, 192);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
+
+    // load the 12th round key to vKey4
+    __ li              (keypos, 176);
+    __ lvx             (vKey5, keypos, key);
+    __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
+
+    // load the 11th round key to vKey5
+    __ li              (keypos, 160);
     __ lvx             (vTmp1, keypos, key);
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
-
-    // load the 14th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
-
-    // load the 13th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
-
-    // load the 12th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
-
-    // load the 11th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
 
     // 1st - 5th rounds
     __ vxor            (vRet, vRet, vKey1);
@@ -2532,22 +2536,22 @@
 
     __ bind            (L_do52);
 
-    // load the 13th round key to vKey11
+    // load the 13th round key to vKey1
     __ li              (keypos, 208);
-    __ lvx             (vTmp1, keypos, key);
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
+    __ lvx             (vKey1, keypos, key);
+    __ li              (keypos, 192);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
 
-    // load the 12th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
+    // load the 12th round key to vKey2
+    __ li              (keypos, 176);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
 
-    // load the 11th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
+    // load the 11th round key to vKey3
+    __ li              (keypos, 160);
+    __ lvx             (vTmp1, keypos, key);
+    __ vec_perm        (vKey3, vTmp1, vKey3, keyPerm);
 
     // 1st - 3rd rounds
     __ vxor            (vRet, vRet, vKey1);
@@ -2558,42 +2562,42 @@
 
     __ bind            (L_do44);
 
-    // load the 11th round key to vKey11
+    // load the 11th round key to vKey1
     __ li              (keypos, 176);
+    __ lvx             (vKey1, keypos, key);
+    __ li              (keypos, 160);
     __ lvx             (vTmp1, keypos, key);
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
+    __ vec_perm        (vKey1, vTmp1, vKey1, keyPerm);
 
     // 1st round
     __ vxor            (vRet, vRet, vKey1);
 
     __ bind            (L_doLast);
 
-    // load the 10th round key to vKey10
-    __ addi            (keypos, keypos, -16);
+    // load the 10th round key to vKey1
+    __ li              (keypos, 144);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
+
+    // load the 9th round key to vKey2
+    __ li              (keypos, 128);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
+
+    // load the 8th round key to vKey3
+    __ li              (keypos, 112);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
+
+    // load the 7th round key to vKey4
+    __ li              (keypos, 96);
+    __ lvx             (vKey5, keypos, key);
+    __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
+
+    // load the 6th round key to vKey5
+    __ li              (keypos, 80);
     __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
-
-    // load the 9th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
-
-    // load the 8th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
-
-    // load the 7th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
-
-    // load the 6th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey5, vTmp2, vTmp1, keyPerm);
+    __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
 
     // last 10th - 6th rounds
     __ vncipher        (vRet, vRet, vKey1);
@@ -2602,30 +2606,29 @@
     __ vncipher        (vRet, vRet, vKey4);
     __ vncipher        (vRet, vRet, vKey5);
 
-    // load the 5th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
+    // load the 5th round key to vKey1
+    __ li              (keypos, 64);
+    __ lvx             (vKey2, keypos, key);
+    __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
 
-    // load the 4th round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
+    // load the 4th round key to vKey2
+    __ li              (keypos, 48);
+    __ lvx             (vKey3, keypos, key);
+    __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
 
-    // load the 3rd round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
+    // load the 3rd round key to vKey3
+    __ li              (keypos, 32);
+    __ lvx             (vKey4, keypos, key);
+    __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
 
-    // load the 2nd round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp1, keypos, key);
-    __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
+    // load the 2nd round key to vKey4
+    __ li              (keypos, 16);
+    __ lvx             (vKey5, keypos, key);
+    __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
 
-    // load the 1st round key to vKey10
-    __ addi            (keypos, keypos, -16);
-    __ lvx             (vTmp2, keypos, key);
-    __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
+    // load the 1st round key to vKey5
+    __ lvx             (vTmp1, key);
+    __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
 
     // last 5th - 1th rounds
     __ vncipher        (vRet, vRet, vKey1);
@@ -2634,24 +2637,54 @@
     __ vncipher        (vRet, vRet, vKey4);
     __ vncipherlast    (vRet, vRet, vKey5);
 
-    __ neg             (temp, to);
-    __ lvsr            (toPerm, temp);
-    __ vspltisb        (vTmp2, -1);
-    __ vxor            (vTmp1, vTmp1, vTmp1);
-    __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
-    __ vxor            (toPerm, toPerm, fSplt);
+    // store result (unaligned)
+#ifdef VM_LITTLE_ENDIAN
+    __ lvsl            (toPerm, to);
+#else
+    __ lvsr            (toPerm, to);
+#endif
+    __ vspltisb        (vTmp3, -1);
+    __ vspltisb        (vTmp4, 0);
     __ lvx             (vTmp1, to);
-    __ vperm           (vRet, vRet, vRet, toPerm);
-    __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
-    __ lvx             (vTmp4, fifteen, to);
+    __ lvx             (vTmp2, fifteen, to);
+#ifdef VM_LITTLE_ENDIAN
+    __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
+    __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
+#else
+    __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
+#endif
+    __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
+    __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
+    __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
+    __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
     __ stvx            (vTmp1, to);
-    __ vsel            (vRet, vRet, vTmp4, vTmp2);
-    __ stvx            (vRet, fifteen, to);
 
     __ blr();
      return start;
   }
 
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    assert(UseSHA, "need SHA instructions");
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    __ sha256 (multi_block);
+
+    __ blr();
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    assert(UseSHA, "need SHA instructions");
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    __ sha512 (multi_block);
+
+    __ blr();
+    return start;
+  }
+
   void generate_arraycopy_stubs() {
     // Note: the disjoint stubs must be generated first, some of
     // the conjoint stubs use them.
@@ -2881,6 +2914,15 @@
       StubRoutines::_montgomerySquare
         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
     }
+
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
+    }
   }
 
  public:
diff --git a/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp b/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp
index 3655c1f..e6e90ae 100644
--- a/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp
+++ b/src/cpu/ppc/vm/stubRoutines_ppc_64.hpp
@@ -34,7 +34,7 @@
 
 enum platform_dependent_constants {
   code_size1 = 20000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 20000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 24000           // simply increase if too small (assembler will crash if too small)
 };
 
 // CRC32 Intrinsics.
diff --git a/src/cpu/ppc/vm/vm_version_ppc.cpp b/src/cpu/ppc/vm/vm_version_ppc.cpp
index 6761c02..bbc5269 100644
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp
@@ -110,7 +110,7 @@
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -124,7 +124,8 @@
                (has_vcipher() ? " aes"     : ""),
                (has_vpmsumb() ? " vpmsumb" : ""),
                (has_mfdscr()  ? " mfdscr"  : ""),
-               (has_vsx()     ? " vsx"     : "")
+               (has_vsx()     ? " vsx"     : ""),
+               (has_vshasig() ? " sha"     : "")
                // Make sure number of %s matches num_features!
               );
   _features_str = strdup(buf);
@@ -173,7 +174,6 @@
   }
 
   // The AES intrinsic stubs require AES instruction support.
-#if defined(VM_LITTLE_ENDIAN)
   if (has_vcipher()) {
     if (FLAG_IS_DEFAULT(UseAES)) {
       UseAES = true;
@@ -194,29 +194,43 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
-#else
-  if (UseAES) {
-    warning("AES instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAES, false);
-  }
-  if (UseAESIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
-      warning("AES intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-  }
-#endif
-
-  if (UseSHA) {
-    warning("SHA instructions are not available on this CPU");
+  if (has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA)) {
+      UseSHA = true;
+    }
+  } else if (UseSHA) {
+    if (!FLAG_IS_DEFAULT(UseSHA))
+      warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
   }
-  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
-    warning("SHA intrinsics are not available on this CPU");
+
+  if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA && has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA && has_vshasig()) {
+    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+    }
+  } else if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
     UseMontgomeryMultiplyIntrinsic = true;
   }
@@ -503,6 +517,7 @@
   a->vpmsumb(VR0, VR1, VR2);                   // code[12] -> vpmsumb
   a->mfdscr(R0);                               // code[13] -> mfdscr
   a->lxvd2x(VSR0, R3_ARG1);                    // code[14] -> vsx
+  a->vshasigmaw(VR0, VR1, 1, 0xF);             // code[15] -> vshasig
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -551,6 +566,7 @@
   if (code[feature_cntr++]) features |= vpmsumb_m;
   if (code[feature_cntr++]) features |= mfdscr_m;
   if (code[feature_cntr++]) features |= vsx_m;
+  if (code[feature_cntr++]) features |= vshasig_m;
 
   // Print the detection code.
   if (PrintAssembly) {
diff --git a/src/cpu/ppc/vm/vm_version_ppc.hpp b/src/cpu/ppc/vm/vm_version_ppc.hpp
index 6245e1c..86243a1 100644
--- a/src/cpu/ppc/vm/vm_version_ppc.hpp
+++ b/src/cpu/ppc/vm/vm_version_ppc.hpp
@@ -47,6 +47,7 @@
     vpmsumb,
     mfdscr,
     vsx,
+    vshasig,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -63,6 +64,7 @@
     dcba_m                = (1 << dcba   ),
     lqarx_m               = (1 << lqarx  ),
     vcipher_m             = (1 << vcipher),
+    vshasig_m             = (1 << vshasig),
     vpmsumb_m             = (1 << vpmsumb),
     mfdscr_m              = (1 << mfdscr ),
     vsx_m                 = (1 << vsx    ),
@@ -99,6 +101,7 @@
   static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
   static bool has_mfdscr()  { return (_features & mfdscr_m) != 0; }
   static bool has_vsx()     { return (_features & vsx_m) != 0; }
+  static bool has_vshasig() { return (_features & vshasig_m) != 0; }
 
   static const char* cpu_features() { return _features_str; }
 
diff --git a/src/cpu/x86/vm/stubRoutines_x86_64.hpp b/src/cpu/x86/vm/stubRoutines_x86_64.hpp
index 15922b8..b048fd7 100644
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp
@@ -33,7 +33,7 @@
 
 enum platform_dependent_constants {
   code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 23000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 24000           // simply increase if too small (assembler will crash if too small)
 };
 
 class x86 {
diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
index e20b02d..e36604f 100644
--- a/src/cpu/x86/vm/x86_64.ad
+++ b/src/cpu/x86/vm/x86_64.ad
@@ -3740,6 +3740,23 @@
   %}
 %}
 
+// Indirect Memory Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP reg (ConvI2L idx)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -3891,6 +3908,23 @@
   %}
 %}
 
+// Indirect Memory Times Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 %{
@@ -4082,11 +4116,11 @@
 // case of this is memory operands.
 
 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
-               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
+               indIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
                indCompressedOopOffset,
                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
-               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
+               indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -5120,6 +5154,17 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 %{
   match(Set dst mem);
@@ -5204,6 +5249,18 @@
   ins_pipe(ialu_reg_reg_fat);
 %}
 
+instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 %{
   predicate(Universe::narrow_oop_shift() == 0);
diff --git a/src/os/aix/vm/os_aix.cpp b/src/os/aix/vm/os_aix.cpp
index b03ce24..717f692 100644
--- a/src/os/aix/vm/os_aix.cpp
+++ b/src/os/aix/vm/os_aix.cpp
@@ -4184,8 +4184,7 @@
 
   /* Scan the directory */
   bool result = true;
-  char buf[sizeof(struct dirent) + MAX_PATH];
-  while (result && (ptr = ::readdir(dir)) != NULL) {
+  while (result && (ptr = readdir(dir)) != NULL) {
     if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
       result = false;
     }
diff --git a/src/os/aix/vm/os_aix.inline.hpp b/src/os/aix/vm/os_aix.inline.hpp
index a97c94c..421ea34 100644
--- a/src/os/aix/vm/os_aix.inline.hpp
+++ b/src/os/aix/vm/os_aix.inline.hpp
@@ -92,19 +92,6 @@
 
 inline const int os::default_file_open_flags() { return 0;}
 
-inline DIR* os::opendir(const char* dirname)
-{
-  assert(dirname != NULL, "just checking");
-  return ::opendir(dirname);
-}
-
-inline int os::readdir_buf_size(const char *path)
-{
-  // according to aix sys/limits, NAME_MAX must be retrieved at runtime. */
-  const long my_NAME_MAX = pathconf(path, _PC_NAME_MAX);
-  return my_NAME_MAX + sizeof(dirent) + 1;
-}
-
 inline jlong os::lseek(int fd, jlong offset, int whence) {
   return (jlong) ::lseek64(fd, offset, whence);
 }
@@ -121,28 +108,6 @@
   return ::ftruncate64(fd, length);
 }
 
-inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
-{
-  dirent* p;
-  int status;
-  assert(dirp != NULL, "just checking");
-
-  // NOTE: Linux readdir_r (on RH 6.2 and 7.2 at least) is NOT like the POSIX
-  // version. Here is the doc for this function:
-  // http://www.gnu.org/manual/glibc-2.2.3/html_node/libc_262.html
-
-  if((status = ::readdir_r(dirp, dbuf, &p)) != 0) {
-    errno = status;
-    return NULL;
-  } else
-    return p;
-}
-
-inline int os::closedir(DIR *dirp) {
-  assert(dirp != NULL, "argument is NULL");
-  return ::closedir(dirp);
-}
-
 // macros for restartable system calls
 
 #define RESTARTABLE(_cmd, _result) do { \
diff --git a/src/os/aix/vm/perfMemory_aix.cpp b/src/os/aix/vm/perfMemory_aix.cpp
index 96f8451..80ae9e7 100644
--- a/src/os/aix/vm/perfMemory_aix.cpp
+++ b/src/os/aix/vm/perfMemory_aix.cpp
@@ -612,9 +612,8 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
-  while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
+  while ((dentry = os::readdir(tmpdirp)) != NULL) {
 
     // check if the directory entry is a hsperfdata file
     if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
@@ -648,9 +647,8 @@
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
-    while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
+    while ((udentry = os::readdir(subdirp)) != NULL) {
 
       if (filename_to_pid(udentry->d_name) == vmid) {
         struct stat statbuf;
@@ -694,11 +692,9 @@
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
     FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -774,10 +770,8 @@
   // loop under these conditions is dependent upon the implementation of
   // opendir/readdir.
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
-
   errno = 0;
-  while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
+  while ((entry = os::readdir(dirp)) != NULL) {
 
     pid_t pid = filename_to_pid(entry->d_name);
 
@@ -816,7 +810,6 @@
   // Close the directory and reset the current working directory.
   close_directory_secure_cwd(dirp, saved_cwd_fd);
 
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // Make the user specific temporary directory. Returns true if
diff --git a/src/os/bsd/vm/os_bsd.cpp b/src/os/bsd/vm/os_bsd.cpp
index 99a0399..1c0336e 100644
--- a/src/os/bsd/vm/os_bsd.cpp
+++ b/src/os/bsd/vm/os_bsd.cpp
@@ -3957,8 +3957,7 @@
 
   /* Scan the directory */
   bool result = true;
-  char buf[sizeof(struct dirent) + MAX_PATH];
-  while (result && (ptr = ::readdir(dir)) != NULL) {
+  while (result && (ptr = readdir(dir)) != NULL) {
     if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
       result = false;
     }
diff --git a/src/os/bsd/vm/os_bsd.inline.hpp b/src/os/bsd/vm/os_bsd.inline.hpp
index 10b3994..c35abf4 100644
--- a/src/os/bsd/vm/os_bsd.inline.hpp
+++ b/src/os/bsd/vm/os_bsd.inline.hpp
@@ -95,17 +95,6 @@
 
 inline const int os::default_file_open_flags() { return 0;}
 
-inline DIR* os::opendir(const char* dirname)
-{
-  assert(dirname != NULL, "just checking");
-  return ::opendir(dirname);
-}
-
-inline int os::readdir_buf_size(const char *path)
-{
-  return NAME_MAX + sizeof(dirent) + 1;
-}
-
 inline jlong os::lseek(int fd, jlong offset, int whence) {
   return (jlong) ::lseek(fd, offset, whence);
 }
@@ -122,28 +111,6 @@
   return ::ftruncate(fd, length);
 }
 
-inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
-{
-  dirent* p;
-  int status;
-  assert(dirp != NULL, "just checking");
-
-  // NOTE: Bsd readdir_r (on RH 6.2 and 7.2 at least) is NOT like the POSIX
-  // version. Here is the doc for this function:
-  // http://www.gnu.org/manual/glibc-2.2.3/html_node/libc_262.html
-
-  if((status = ::readdir_r(dirp, dbuf, &p)) != 0) {
-    errno = status;
-    return NULL;
-  } else
-    return p;
-}
-
-inline int os::closedir(DIR *dirp) {
-  assert(dirp != NULL, "argument is NULL");
-  return ::closedir(dirp);
-}
-
 // macros for restartable system calls
 
 #define RESTARTABLE(_cmd, _result) do { \
diff --git a/src/os/bsd/vm/perfMemory_bsd.cpp b/src/os/bsd/vm/perfMemory_bsd.cpp
index df4fca6..802d090 100644
--- a/src/os/bsd/vm/perfMemory_bsd.cpp
+++ b/src/os/bsd/vm/perfMemory_bsd.cpp
@@ -533,9 +533,8 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
-  while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
+  while ((dentry = os::readdir(tmpdirp)) != NULL) {
 
     // check if the directory entry is a hsperfdata file
     if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
@@ -557,9 +556,8 @@
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
-    while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
+    while ((udentry = os::readdir(subdirp)) != NULL) {
 
       if (filename_to_pid(udentry->d_name) == vmid) {
         struct stat statbuf;
@@ -603,11 +601,9 @@
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
     FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -686,10 +682,8 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
-
   errno = 0;
-  while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
+  while ((entry = os::readdir(dirp)) != NULL) {
 
     pid_t pid = filename_to_pid(entry->d_name);
 
@@ -729,7 +723,6 @@
   // close the directory and reset the current working directory
   close_directory_secure_cwd(dirp, saved_cwd_fd);
 
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // make the user specific temporary directory. Returns true if
diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp
index a499e49..03cabfe 100644
--- a/src/os/linux/vm/os_linux.cpp
+++ b/src/os/linux/vm/os_linux.cpp
@@ -5501,8 +5501,7 @@
 
   /* Scan the directory */
   bool result = true;
-  char buf[sizeof(struct dirent) + MAX_PATH];
-  while (result && (ptr = ::readdir(dir)) != NULL) {
+  while (result && (ptr = readdir(dir)) != NULL) {
     if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
       result = false;
     }
diff --git a/src/os/linux/vm/os_linux.inline.hpp b/src/os/linux/vm/os_linux.inline.hpp
index fea10fa..a23bd56 100644
--- a/src/os/linux/vm/os_linux.inline.hpp
+++ b/src/os/linux/vm/os_linux.inline.hpp
@@ -87,17 +87,6 @@
 
 inline const int os::default_file_open_flags() { return 0;}
 
-inline DIR* os::opendir(const char* dirname)
-{
-  assert(dirname != NULL, "just checking");
-  return ::opendir(dirname);
-}
-
-inline int os::readdir_buf_size(const char *path)
-{
-  return NAME_MAX + sizeof(dirent) + 1;
-}
-
 inline jlong os::lseek(int fd, jlong offset, int whence) {
   return (jlong) ::lseek64(fd, offset, whence);
 }
@@ -114,33 +103,6 @@
   return ::ftruncate64(fd, length);
 }
 
-// readdir_r has been deprecated since glibc 2.24.
-// See https://sourceware.org/bugzilla/show_bug.cgi?id=19056 for more details.
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
-{
-
-  dirent* p;
-  int status;
-  assert(dirp != NULL, "just checking");
-
-  // NOTE: Linux readdir_r (on RH 6.2 and 7.2 at least) is NOT like the POSIX
-  // version. Here is the doc for this function:
-  // http://www.gnu.org/manual/glibc-2.2.3/html_node/libc_262.html
-
-  if((status = ::readdir_r(dirp, dbuf, &p)) != 0) {
-    errno = status;
-    return NULL;
-  } else
-    return p;
-
-}
-
-inline int os::closedir(DIR *dirp) {
-  assert(dirp != NULL, "argument is NULL");
-  return ::closedir(dirp);
-}
-
 // macros for restartable system calls
 
 #define RESTARTABLE(_cmd, _result) do { \
diff --git a/src/os/linux/vm/perfMemory_linux.cpp b/src/os/linux/vm/perfMemory_linux.cpp
index 4143f65..8293b71 100644
--- a/src/os/linux/vm/perfMemory_linux.cpp
+++ b/src/os/linux/vm/perfMemory_linux.cpp
@@ -533,9 +533,8 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
-  while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
+  while ((dentry = os::readdir(tmpdirp)) != NULL) {
 
     // check if the directory entry is a hsperfdata file
     if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
@@ -569,9 +568,8 @@
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
-    while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
+    while ((udentry = os::readdir(subdirp)) != NULL) {
 
       if (filename_to_pid(udentry->d_name) == vmid) {
         struct stat statbuf;
@@ -615,11 +613,9 @@
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
     FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -698,10 +694,8 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
-
   errno = 0;
-  while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
+  while ((entry = os::readdir(dirp)) != NULL) {
 
     pid_t pid = filename_to_pid(entry->d_name);
 
@@ -738,8 +732,6 @@
 
   // close the directory and reset the current working directory
   close_directory_secure_cwd(dirp, saved_cwd_fd);
-
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // make the user specific temporary directory. Returns true if
diff --git a/src/os/posix/vm/os_posix.cpp b/src/os/posix/vm/os_posix.cpp
index 534b192..a43a855 100644
--- a/src/os/posix/vm/os_posix.cpp
+++ b/src/os/posix/vm/os_posix.cpp
@@ -302,6 +302,21 @@
   return ::fdopen(fd, mode);
 }
 
+DIR* os::opendir(const char* dirname) {
+  assert(dirname != NULL, "just checking");
+  return ::opendir(dirname);
+}
+
+struct dirent* os::readdir(DIR* dirp) {
+  assert(dirp != NULL, "just checking");
+  return ::readdir(dirp);
+}
+
+int os::closedir(DIR *dirp) {
+  assert(dirp != NULL, "just checking");
+  return ::closedir(dirp);
+}
+
 // Builds a platform dependent Agent_OnLoad_<lib_name> function name
 // which is used to find statically linked in agents.
 // Parameters:
diff --git a/src/os/solaris/vm/os_solaris.cpp b/src/os/solaris/vm/os_solaris.cpp
index 014a6d3..6956f63 100644
--- a/src/os/solaris/vm/os_solaris.cpp
+++ b/src/os/solaris/vm/os_solaris.cpp
@@ -5163,9 +5163,7 @@
 
   /* Scan the directory */
   bool result = true;
-  char buf[sizeof(struct dirent) + MAX_PATH];
-  struct dirent *dbuf = (struct dirent *) buf;
-  while (result && (ptr = readdir(dir, dbuf)) != NULL) {
+  while (result && (ptr = readdir(dir)) != NULL) {
     if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) {
       result = false;
     }
diff --git a/src/os/solaris/vm/os_solaris.inline.hpp b/src/os/solaris/vm/os_solaris.inline.hpp
index 921fcf7..8e095ab 100644
--- a/src/os/solaris/vm/os_solaris.inline.hpp
+++ b/src/os/solaris/vm/os_solaris.inline.hpp
@@ -71,37 +71,6 @@
 }
 inline void os::dll_unload(void *lib) { ::dlclose(lib); }
 
-inline DIR* os::opendir(const char* dirname) {
-  assert(dirname != NULL, "just checking");
-  return ::opendir(dirname);
-}
-
-inline int os::readdir_buf_size(const char *path) {
-  int size = pathconf(path, _PC_NAME_MAX);
-  return (size < 0 ? MAXPATHLEN : size) + sizeof(dirent) + 1;
-}
-
-inline struct dirent* os::readdir(DIR* dirp, dirent* dbuf) {
-  assert(dirp != NULL, "just checking");
-#if defined(_LP64) || defined(_GNU_SOURCE) || _FILE_OFFSET_BITS==64
-  dirent* p;
-  int status;
-
-  if((status = ::readdir_r(dirp, dbuf, &p)) != 0) {
-    errno = status;
-    return NULL;
-  } else
-    return p;
-#else  // defined(_LP64) || defined(_GNU_SOURCE) || _FILE_OFFSET_BITS==64
-  return ::readdir_r(dirp, dbuf);
-#endif // defined(_LP64) || defined(_GNU_SOURCE) || _FILE_OFFSET_BITS==64
-}
-
-inline int os::closedir(DIR *dirp) {
-  assert(dirp != NULL, "argument is NULL");
-  return ::closedir(dirp);
-}
-
 //////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 
diff --git a/src/os/solaris/vm/perfMemory_solaris.cpp b/src/os/solaris/vm/perfMemory_solaris.cpp
index 7c6f616..8af1b24 100644
--- a/src/os/solaris/vm/perfMemory_solaris.cpp
+++ b/src/os/solaris/vm/perfMemory_solaris.cpp
@@ -524,9 +524,8 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
-  while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
+  while ((dentry = os::readdir(tmpdirp)) != NULL) {
 
     // check if the directory entry is a hsperfdata file
     if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
@@ -560,9 +559,8 @@
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
-    while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
+    while ((udentry = os::readdir(subdirp)) != NULL) {
 
       if (filename_to_pid(udentry->d_name) == vmid) {
         struct stat statbuf;
@@ -606,11 +604,9 @@
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
     FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(oldest_user);
 }
@@ -737,10 +733,8 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
-
   errno = 0;
-  while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
+  while ((entry = os::readdir(dirp)) != NULL) {
 
     pid_t pid = filename_to_pid(entry->d_name);
 
@@ -780,7 +774,6 @@
   // close the directory and reset the current working directory
   close_directory_secure_cwd(dirp, saved_cwd_fd);
 
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // make the user specific temporary directory. Returns true if
diff --git a/src/os/windows/vm/os_windows.cpp b/src/os/windows/vm/os_windows.cpp
index 2aca06d..06eebde 100644
--- a/src/os/windows/vm/os_windows.cpp
+++ b/src/os/windows/vm/os_windows.cpp
@@ -1172,14 +1172,12 @@
     return dirp;
 }
 
-/* parameter dbuf unused on Windows */
-
 struct dirent *
-os::readdir(DIR *dirp, dirent *dbuf)
+os::readdir(DIR *dirp)
 {
     assert(dirp != NULL, "just checking");      // hotspot change
     if (dirp->handle == INVALID_HANDLE_VALUE) {
-        return 0;
+        return NULL;
     }
 
     strcpy(dirp->dirent.d_name, dirp->find_data.cFileName);
@@ -1187,7 +1185,7 @@
     if (!FindNextFile(dirp->handle, &dirp->find_data)) {
         if (GetLastError() == ERROR_INVALID_HANDLE) {
             errno = EBADF;
-            return 0;
+            return NULL;
         }
         FindClose(dirp->handle);
         dirp->handle = INVALID_HANDLE_VALUE;
diff --git a/src/os/windows/vm/os_windows.inline.hpp b/src/os/windows/vm/os_windows.inline.hpp
index 30ce468..5dac11c 100644
--- a/src/os/windows/vm/os_windows.inline.hpp
+++ b/src/os/windows/vm/os_windows.inline.hpp
@@ -65,14 +65,6 @@
   return true;
 }
 
-inline int os::readdir_buf_size(const char *path)
-{
-  /* As Windows doesn't use the directory entry buffer passed to
-     os::readdir() this can be as short as possible */
-
-  return 1;
-}
-
 // Bang the shadow pages if they need to be touched to be mapped.
 inline void os::bang_stack_shadow_pages() {
   // Write to each page of our new frame to force OS mapping.
diff --git a/src/os/windows/vm/perfMemory_windows.cpp b/src/os/windows/vm/perfMemory_windows.cpp
index b32fd9a..c4cf8c1 100644
--- a/src/os/windows/vm/perfMemory_windows.cpp
+++ b/src/os/windows/vm/perfMemory_windows.cpp
@@ -316,9 +316,8 @@
   // to determine the user name for the process id.
   //
   struct dirent* dentry;
-  char* tdbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(tmpdirname), mtInternal);
   errno = 0;
-  while ((dentry = os::readdir(tmpdirp, (struct dirent *)tdbuf)) != NULL) {
+  while ((dentry = os::readdir(tmpdirp)) != NULL) {
 
     // check if the directory entry is a hsperfdata file
     if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
@@ -351,9 +350,8 @@
     }
 
     struct dirent* udentry;
-    char* udbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(usrdir_name), mtInternal);
     errno = 0;
-    while ((udentry = os::readdir(subdirp, (struct dirent *)udbuf)) != NULL) {
+    while ((udentry = os::readdir(subdirp)) != NULL) {
 
       if (filename_to_pid(udentry->d_name) == vmid) {
         struct stat statbuf;
@@ -405,11 +403,9 @@
       }
     }
     os::closedir(subdirp);
-    FREE_C_HEAP_ARRAY(char, udbuf, mtInternal);
     FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
   }
   os::closedir(tmpdirp);
-  FREE_C_HEAP_ARRAY(char, tdbuf, mtInternal);
 
   return(latest_user);
 }
@@ -639,9 +635,8 @@
   // opendir/readdir.
   //
   struct dirent* entry;
-  char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
   errno = 0;
-  while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
+  while ((entry = os::readdir(dirp)) != NULL) {
 
     int pid = filename_to_pid(entry->d_name);
 
@@ -682,7 +677,6 @@
     errno = 0;
   }
   os::closedir(dirp);
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
 // create a file mapping object with the requested name, and size
diff --git a/src/share/vm/c1/c1_Optimizer.cpp b/src/share/vm/c1/c1_Optimizer.cpp
index f366462..a2ba0e3 100644
--- a/src/share/vm/c1/c1_Optimizer.cpp
+++ b/src/share/vm/c1/c1_Optimizer.cpp
@@ -175,6 +175,12 @@
   for_each_phi_fun(t_block, phi, return; );
   for_each_phi_fun(f_block, phi, return; );
 
+  // Only replace safepoint gotos if state_before information is available (if is a safepoint)
+  bool is_safepoint = if_->is_safepoint();
+  if (!is_safepoint && (t_goto->is_safepoint() || f_goto->is_safepoint())) {
+    return;
+  }
+
   // 2) substitute conditional expression
   //    with an IfOp followed by a Goto
   // cut if_ away and get node before
@@ -203,7 +209,7 @@
 
   // append Goto to successor
   ValueStack* state_before = if_->state_before();
-  Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint());
+  Goto* goto_ = new Goto(sux, state_before, is_safepoint);
 
   // prepare state for Goto
   ValueStack* goto_state = if_state;
diff --git a/src/share/vm/ci/bcEscapeAnalyzer.cpp b/src/share/vm/ci/bcEscapeAnalyzer.cpp
index 4c4db3e..2b9e0e5 100644
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp
@@ -1170,45 +1170,43 @@
   }
 }
 
-bool BCEscapeAnalyzer::do_analysis() {
+void BCEscapeAnalyzer::do_analysis() {
   Arena* arena = CURRENT_ENV->arena();
   // identify basic blocks
   _methodBlocks = _method->get_method_blocks();
 
   iterate_blocks(arena);
-  // TEMPORARY
-  return true;
 }
 
 vmIntrinsics::ID BCEscapeAnalyzer::known_intrinsic() {
   vmIntrinsics::ID iid = method()->intrinsic_id();
-
   if (iid == vmIntrinsics::_getClass ||
       iid ==  vmIntrinsics::_fillInStackTrace ||
-      iid == vmIntrinsics::_hashCode)
+      iid == vmIntrinsics::_hashCode) {
     return iid;
-  else
+  } else {
     return vmIntrinsics::_none;
+  }
 }
 
-bool BCEscapeAnalyzer::compute_escape_for_intrinsic(vmIntrinsics::ID iid) {
+void BCEscapeAnalyzer::compute_escape_for_intrinsic(vmIntrinsics::ID iid) {
   ArgumentMap arg;
   arg.clear();
   switch (iid) {
-  case vmIntrinsics::_getClass:
-    _return_local = false;
-    break;
-  case vmIntrinsics::_fillInStackTrace:
-    arg.set(0); // 'this'
-    set_returned(arg);
-    break;
-  case vmIntrinsics::_hashCode:
-    // initialized state is correct
-    break;
+    case vmIntrinsics::_getClass:
+      _return_local = false;
+      _return_allocated = false;
+      break;
+    case vmIntrinsics::_fillInStackTrace:
+      arg.set(0); // 'this'
+      set_returned(arg);
+      break;
+    case vmIntrinsics::_hashCode:
+      // initialized state is correct
+      break;
   default:
     assert(false, "unexpected intrinsic");
   }
-  return true;
 }
 
 void BCEscapeAnalyzer::initialize() {
@@ -1279,7 +1277,7 @@
   vmIntrinsics::ID iid = known_intrinsic();
 
   // check if method can be analyzed
-  if (iid ==  vmIntrinsics::_none && (method()->is_abstract() || method()->is_native() || !method()->holder()->is_initialized()
+  if (iid == vmIntrinsics::_none && (method()->is_abstract() || method()->is_native() || !method()->holder()->is_initialized()
       || _level > MaxBCEAEstimateLevel
       || method()->code_size() > MaxBCEAEstimateSize)) {
     if (BCEATraceLevel >= 1) {
@@ -1312,8 +1310,6 @@
     tty->print_cr(" (%d bytes)", method()->code_size());
   }
 
-  bool success;
-
   initialize();
 
   // Do not scan method if it has no object parameters and
@@ -1329,9 +1325,9 @@
   }
 
   if (iid != vmIntrinsics::_none)
-    success = compute_escape_for_intrinsic(iid);
+    compute_escape_for_intrinsic(iid);
   else {
-    success = do_analysis();
+    do_analysis();
   }
 
   // don't store interprocedural escape information if it introduces
diff --git a/src/share/vm/ci/bcEscapeAnalyzer.hpp b/src/share/vm/ci/bcEscapeAnalyzer.hpp
index 74a0d31..acca5a2 100644
--- a/src/share/vm/ci/bcEscapeAnalyzer.hpp
+++ b/src/share/vm/ci/bcEscapeAnalyzer.hpp
@@ -101,8 +101,8 @@
   void clear_escape_info();
   void compute_escape_info();
   vmIntrinsics::ID known_intrinsic();
-  bool compute_escape_for_intrinsic(vmIntrinsics::ID iid);
-  bool do_analysis();
+  void compute_escape_for_intrinsic(vmIntrinsics::ID iid);
+  void do_analysis();
 
   void read_escape_info();
 
diff --git a/src/share/vm/ci/ciStreams.cpp b/src/share/vm/ci/ciStreams.cpp
index 76520fd..a4eaf47 100644
--- a/src/share/vm/ci/ciStreams.cpp
+++ b/src/share/vm/ci/ciStreams.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -361,14 +361,14 @@
 ciMethod* ciBytecodeStream::get_method(bool& will_link, ciSignature* *declared_signature_result) {
   VM_ENTRY_MARK;
   ciEnv* env = CURRENT_ENV;
-  constantPoolHandle cpool(_method->get_Method()->constants());
+  constantPoolHandle cpool(THREAD, _method->get_Method()->constants());
   ciMethod* m = env->get_method_by_index(cpool, get_method_index(), cur_bc(), _holder);
   will_link = m->is_loaded();
 
   // Use the MethodType stored in the CP cache to create a signature
   // with correct types (in respect to class loaders).
   if (has_method_type()) {
-    ciSymbol*     sig_sym     = env->get_symbol(cpool->symbol_at(get_method_signature_index()));
+    ciSymbol*     sig_sym     = env->get_symbol(cpool->symbol_at(get_method_signature_index(cpool)));
     ciKlass*      pool_holder = env->get_klass(cpool->pool_holder());
     ciMethodType* method_type = get_method_type();
     ciSignature* declared_signature = new (env->arena()) ciSignature(pool_holder, sig_sym, method_type);
@@ -465,9 +465,8 @@
 // Get the constant pool index of the signature of the method
 // referenced by the current bytecode.  Used for generating
 // deoptimization information.
-int ciBytecodeStream::get_method_signature_index() {
+int ciBytecodeStream::get_method_signature_index(const constantPoolHandle& cpool) {
   GUARDED_VM_ENTRY(
-    ConstantPool* cpool = _holder->get_instanceKlass()->constants();
     const int method_index = get_method_index();
     const int name_and_type_index = cpool->name_and_type_ref_index_at(method_index);
     return cpool->signature_ref_index_at(name_and_type_index);
diff --git a/src/share/vm/ci/ciStreams.hpp b/src/share/vm/ci/ciStreams.hpp
index 091aa1b..07e573b 100644
--- a/src/share/vm/ci/ciStreams.hpp
+++ b/src/share/vm/ci/ciStreams.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,7 +264,7 @@
   ciMethodType* get_method_type();
   ciKlass*      get_declared_method_holder();
   int           get_method_holder_index();
-  int           get_method_signature_index();
+  int           get_method_signature_index(const constantPoolHandle& cpool);
 
   // Get the resolved references arrays from the constant pool
   ciObjArray* get_resolved_references();
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index e69839c..aa6bbc8 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -2790,6 +2790,7 @@
       // Previous workers starting region is valid
       // so let's iterate from there
       start_ind = (cs_size * (worker_i - 1)) / active_workers;
+      OrderAccess::loadload();
       result = _worker_cset_start_region[worker_i - 1];
     }
 
diff --git a/src/share/vm/oops/instanceKlass.cpp b/src/share/vm/oops/instanceKlass.cpp
index 6aff230..0204188 100644
--- a/src/share/vm/oops/instanceKlass.cpp
+++ b/src/share/vm/oops/instanceKlass.cpp
@@ -147,7 +147,7 @@
       len = name->utf8_length();                                 \
     }                                                            \
     HOTSPOT_CLASS_INITIALIZATION_##type(                         \
-      data, len, (clss)->class_loader(), thread_type);           \
+      data, len, (void *)(clss)->class_loader(), thread_type); \
   }
 
 #define DTRACE_CLASSINIT_PROBE_WAIT(type, clss, thread_type, wait) \
@@ -160,7 +160,7 @@
       len = name->utf8_length();                                 \
     }                                                            \
     HOTSPOT_CLASS_INITIALIZATION_##type(                         \
-      data, len, (clss)->class_loader(), thread_type, wait);     \
+      data, len, (void *)(clss)->class_loader(), thread_type, wait); \
   }
 #endif /* USDT2 */
 
diff --git a/src/share/vm/oops/instanceKlass.hpp b/src/share/vm/oops/instanceKlass.hpp
index a5f2eb3..444eadd 100644
--- a/src/share/vm/oops/instanceKlass.hpp
+++ b/src/share/vm/oops/instanceKlass.hpp
@@ -225,6 +225,7 @@
   // _is_marked_dependent can be set concurrently, thus cannot be part of the
   // _misc_flags.
   bool            _is_marked_dependent;  // used for marking during flushing and deoptimization
+  bool            _is_being_redefined;   // used for locking redefinition
   bool            _has_unloaded_dependent;
 
   enum {
@@ -667,6 +668,10 @@
     _nonstatic_oop_map_size = words;
   }
 
+  // Redefinition locking.  Class can only be redefined by one thread at a time.
+  bool is_being_redefined() const          { return _is_being_redefined; }
+  void set_is_being_redefined(bool value)  { _is_being_redefined = value; }
+
   // RedefineClasses() support for previous versions:
   void add_previous_version(instanceKlassHandle ikh, int emcp_method_count);
 
diff --git a/src/share/vm/oops/klass.hpp b/src/share/vm/oops/klass.hpp
index 9f612c3..df01892 100644
--- a/src/share/vm/oops/klass.hpp
+++ b/src/share/vm/oops/klass.hpp
@@ -335,10 +335,11 @@
     _lh_header_size_mask        = right_n_bits(BitsPerByte),  // shifted mask
     _lh_array_tag_bits          = 2,
     _lh_array_tag_shift         = BitsPerInt - _lh_array_tag_bits,
-    _lh_array_tag_type_value    = ~0x00,  // 0xC0000000 >> 30
     _lh_array_tag_obj_value     = ~0x01   // 0x80000000 >> 30
   };
 
+  static const unsigned int _lh_array_tag_type_value = 0Xffffffff; // ~0x00,  // 0xC0000000 >> 30
+
   static int layout_helper_size_in_bytes(jint lh) {
     assert(lh > (jint)_lh_neutral_value, "must be instance");
     return (int) lh & ~_lh_instance_slow_path_bit;
diff --git a/src/share/vm/oops/klassVtable.cpp b/src/share/vm/oops/klassVtable.cpp
index 315d97f..61dd4c2 100644
--- a/src/share/vm/oops/klassVtable.cpp
+++ b/src/share/vm/oops/klassVtable.cpp
@@ -663,6 +663,7 @@
   Method* super_method = NULL;
   InstanceKlass *holder = NULL;
   Method* recheck_method =  NULL;
+  bool found_pkg_prvt_method = false;
   while (k != NULL) {
     // lookup through the hierarchy for a method with matching name and sign.
     super_method = InstanceKlass::cast(k)->lookup_method(name, signature);
@@ -684,12 +685,31 @@
         return false;
       // else keep looking for transitive overrides
       }
+      // If we get here then one of the super classes has a package private method
+      // that will not get overridden because it is in a different package.  But,
+      // that package private method does "override" any matching methods in super
+      // interfaces, so there will be no miranda vtable entry created.  So, set flag
+      // to TRUE for use below, in case there are no methods in super classes that
+      // this target method overrides.
+      assert(super_method->is_package_private(), "super_method must be package private");
+      assert(!superk->is_same_class_package(classloader(), classname),
+             "Must be different packages");
+      found_pkg_prvt_method = true;
     }
 
     // Start with lookup result and continue to search up
     k = superk->super(); // haven't found an override match yet; continue to look
   }
 
+  // If found_pkg_prvt_method is set, then the ONLY matching method in the
+  // superclasses is package private in another package. That matching method will
+  // prevent a miranda vtable entry from being created. Because the target method can not
+  // override the package private method in another package, then it needs to be the root
+  // for its own vtable entry.
+  if (found_pkg_prvt_method) {
+     return true;
+  }
+
   // if the target method is public or protected it may have a matching
   // miranda method in the super, whose entry it should re-use.
   // Actually, to handle cases that javac would not generate, we need
@@ -697,7 +717,7 @@
   InstanceKlass *sk = InstanceKlass::cast(super);
   if (sk->has_miranda_methods()) {
     if (sk->lookup_method_in_all_interfaces(name, signature, Klass::find_defaults) != NULL) {
-      return false;  // found a matching miranda; we do not need a new entry
+      return false; // found a matching miranda; we do not need a new entry
     }
   }
   return true; // found no match; we need a new entry
diff --git a/src/share/vm/opto/connode.cpp b/src/share/vm/opto/connode.cpp
index 604e971..ff56804 100644
--- a/src/share/vm/opto/connode.cpp
+++ b/src/share/vm/opto/connode.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1083,7 +1083,11 @@
     assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow");
     assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow");
     Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), NULL);
+    Node *hook = new (phase->C) Node(1);
+    hook->init_req(0, cx);  // Add a use to cx to prevent him from dying
     Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), NULL);
+    hook->del_req(0);  // Just yank bogus edge
+    hook->destruct();
     switch (op) {
     case Op_AddI:  return new (phase->C) AddLNode(cx, cy);
     case Op_SubI:  return new (phase->C) SubLNode(cx, cy);
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
index 16313f0..dabc7e7 100644
--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -1787,12 +1787,13 @@
 // A better answer would be to separate out card marks from other memory.
 // For now, return the input memory state, so that it can be reused
 // after the call, if this call has restricted memory effects.
-Node* GraphKit::set_predefined_input_for_runtime_call(SafePointNode* call) {
+Node* GraphKit::set_predefined_input_for_runtime_call(SafePointNode* call, Node* narrow_mem) {
   // Set fixed predefined input arguments
   Node* memory = reset_memory();
+  Node* m = narrow_mem == NULL ? memory : narrow_mem;
   call->init_req( TypeFunc::Control,   control()  );
   call->init_req( TypeFunc::I_O,       top()      ); // does no i/o
-  call->init_req( TypeFunc::Memory,    memory     ); // may gc ptrs
+  call->init_req( TypeFunc::Memory,    m          ); // may gc ptrs
   call->init_req( TypeFunc::FramePtr,  frameptr() );
   call->init_req( TypeFunc::ReturnAdr, top()      );
   return memory;
@@ -2382,9 +2383,7 @@
   } else {
     assert(!wide_out, "narrow in => narrow out");
     Node* narrow_mem = memory(adr_type);
-    prev_mem = reset_memory();
-    map()->set_memory(narrow_mem);
-    set_predefined_input_for_runtime_call(call);
+    prev_mem = set_predefined_input_for_runtime_call(call, narrow_mem);
   }
 
   // Hook each parm in order.  Stop looking at the first NULL.
diff --git a/src/share/vm/opto/graphKit.hpp b/src/share/vm/opto/graphKit.hpp
index c584cef..7a363fd 100644
--- a/src/share/vm/opto/graphKit.hpp
+++ b/src/share/vm/opto/graphKit.hpp
@@ -700,7 +700,7 @@
   void  set_predefined_output_for_runtime_call(Node* call,
                                                Node* keep_mem,
                                                const TypePtr* hook_mem);
-  Node* set_predefined_input_for_runtime_call(SafePointNode* call);
+  Node* set_predefined_input_for_runtime_call(SafePointNode* call, Node* narrow_mem = NULL);
 
   // Replace the call with the current state of the kit.  Requires
   // that the call was generated with separate io_projs so that
diff --git a/src/share/vm/opto/ifnode.cpp b/src/share/vm/opto/ifnode.cpp
index 3101455..68f068d 100644
--- a/src/share/vm/opto/ifnode.cpp
+++ b/src/share/vm/opto/ifnode.cpp
@@ -601,7 +601,7 @@
     if( din4->is_Call() &&      // Handle a slow-path call on either arm
         (din4 = din4->in(0)) )
       din4 = din4->in(0);
-    if( din3 == din4 && din3->is_If() )
+    if (din3 != NULL && din3 == din4 && din3->is_If()) // Regions not degraded to a copy
       return din3;              // Skip around diamonds
   }
 
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
index 5f2bb30..4bb5ca8 100644
--- a/src/share/vm/opto/library_call.cpp
+++ b/src/share/vm/opto/library_call.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3803,7 +3803,7 @@
   }
   // Now test the correct condition.
   jint  nval = (obj_array
-                ? ((jint)Klass::_lh_array_tag_type_value
+                ? (jint)(Klass::_lh_array_tag_type_value
                    <<    Klass::_lh_array_tag_shift)
                 : Klass::_lh_neutral_value);
   Node* cmp = _gvn.transform(new(C) CmpINode(layout_val, intcon(nval)));
@@ -6759,10 +6759,18 @@
   if (state == NULL) return false;
 
   // Call the stub.
-  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
-                                 OptoRuntime::digestBase_implCompressMB_Type(),
-                                 stubAddr, stubName, TypePtr::BOTTOM,
-                                 src_start, state, ofs, limit);
+  Node *call;
+  if (CCallingConventionRequiresIntsAsLongs) {
+    call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                             OptoRuntime::digestBase_implCompressMB_Type(),
+                             stubAddr, stubName, TypePtr::BOTTOM,
+                             src_start, state, ofs XTOP, limit XTOP);
+  } else {
+    call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                             OptoRuntime::digestBase_implCompressMB_Type(),
+                             stubAddr, stubName, TypePtr::BOTTOM,
+                             src_start, state, ofs, limit);
+  }
   // return ofs (int)
   Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
   set_result(result);
diff --git a/src/share/vm/opto/loopPredicate.cpp b/src/share/vm/opto/loopPredicate.cpp
index 5d2b469..db7c108 100644
--- a/src/share/vm/opto/loopPredicate.cpp
+++ b/src/share/vm/opto/loopPredicate.cpp
@@ -611,7 +611,11 @@
   const TypeInt* idx_type = TypeInt::INT;
   if ((stride > 0) == (scale > 0) == upper) {
     if (TraceLoopPredicate) {
-      predString->print(limit->is_Con() ? "(%d " : "(limit ", con_limit);
+      if (limit->is_Con()) {
+        predString->print("(%d ", con_limit);
+      } else {
+        predString->print("(limit ");
+      }
       predString->print("- %d) ", stride);
     }
     // Check if (limit - stride) may overflow
@@ -639,7 +643,11 @@
     register_new_node(max_idx_expr, ctrl);
   } else {
     if (TraceLoopPredicate) {
-      predString->print(init->is_Con() ? "%d " : "init ", con_init);
+      if (init->is_Con()) {
+        predString->print("%d ", con_init);
+      } else {
+        predString->print("init ");
+      }
     }
     idx_type = _igvn.type(init)->isa_int();
     max_idx_expr = init;
@@ -675,7 +683,11 @@
 
   if (offset && (!offset->is_Con() || con_offset != 0)){
     if (TraceLoopPredicate) {
-      predString->print(offset->is_Con() ? "+ %d " : "+ offset", con_offset);
+      if (offset->is_Con()) {
+        predString->print("+ %d ", con_offset);
+      } else {
+        predString->print("+ offset");
+      }
     }
     // Check if (max_idx_expr + offset) may overflow
     const TypeInt* offset_type = _igvn.type(offset)->isa_int();
diff --git a/src/share/vm/opto/loopTransform.cpp b/src/share/vm/opto/loopTransform.cpp
index 8b656ff..7ef67c1 100644
--- a/src/share/vm/opto/loopTransform.cpp
+++ b/src/share/vm/opto/loopTransform.cpp
@@ -1537,13 +1537,20 @@
 
 //------------------------------adjust_limit-----------------------------------
 // Helper function for add_constraint().
-Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl) {
+Node* PhaseIdealLoop::adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up) {
   // Compute "I :: (limit-offset)/scale"
   Node *con = new (C) SubINode(rc_limit, offset);
   register_new_node(con, pre_ctrl);
   Node *X = new (C) DivINode(0, con, scale);
   register_new_node(X, pre_ctrl);
 
+  // When the absolute value of scale is greater than one, the integer
+  // division may round limit down so add one to the limit.
+  if (round_up) {
+    X = new (C) AddINode(X, _igvn.intcon(1));
+    register_new_node(X, pre_ctrl);
+  }
+
   // Adjust loop limit
   loop_limit = (stride_con > 0)
                ? (Node*)(new (C) MinINode(loop_limit, X))
@@ -1584,7 +1591,7 @@
     // (upper_limit-offset) may overflow or underflow.
     // But it is fine since main loop will either have
     // less iterations or will be skipped in such case.
-    *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl);
+    *main_limit = adjust_limit(stride_con, scale, offset, upper_limit, *main_limit, pre_ctrl, false);
 
     // The underflow limit: low_limit <= scale*I+offset.
     // For pre-loop compute
@@ -1620,7 +1627,8 @@
       // max(pre_limit, original_limit) is used in do_range_check().
     }
     // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
-    *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl);
+    *pre_limit = adjust_limit((-stride_con), scale, offset, low_limit, *pre_limit, pre_ctrl,
+                              scale_con > 1 && stride_con > 0);
 
   } else { // stride_con*scale_con < 0
     // For negative stride*scale pre-loop checks for overflow and
@@ -1646,7 +1654,8 @@
     Node *plus_one = new (C) AddINode(offset, one);
     register_new_node( plus_one, pre_ctrl );
     // Pass (-stride) to indicate pre_loop_cond = NOT(main_loop_cond);
-    *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl);
+    *pre_limit = adjust_limit((-stride_con), scale, plus_one, upper_limit, *pre_limit, pre_ctrl,
+                              scale_con < -1 && stride_con > 0);
 
     if (low_limit->get_int() == -max_jint) {
       if (!RangeLimitCheck) return;
@@ -1681,7 +1690,8 @@
     //       I > (low_limit-(offset+1))/scale
     //   )
 
-    *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl);
+    *main_limit = adjust_limit(stride_con, scale, plus_one, low_limit, *main_limit, pre_ctrl,
+                               false);
   }
 }
 
diff --git a/src/share/vm/opto/loopnode.hpp b/src/share/vm/opto/loopnode.hpp
index d97bf07..0ce98a0 100644
--- a/src/share/vm/opto/loopnode.hpp
+++ b/src/share/vm/opto/loopnode.hpp
@@ -959,7 +959,7 @@
   // loop.  Scale_con, offset and limit are all loop invariant.
   void add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
   // Helper function for add_constraint().
-  Node* adjust_limit( int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl );
+  Node* adjust_limit(int stride_con, Node * scale, Node *offset, Node *rc_limit, Node *loop_limit, Node *pre_ctrl, bool round_up);
 
   // Partially peel loop up through last_peel node.
   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
index b4dda48..965f1a9 100644
--- a/src/share/vm/opto/matcher.cpp
+++ b/src/share/vm/opto/matcher.cpp
@@ -2043,6 +2043,12 @@
         // Node is shared and has no reason to clone.  Flag it as shared.
         // This causes it to match into a register for the sharing.
         set_shared(n);       // Flag as shared and
+        if (n->is_DecodeNarrowPtr()) {
+          // Oop field/array element loads must be shared but since
+          // they are shared through a DecodeN they may appear to have
+          // a single use so force sharing here.
+          set_shared(n->in(1));
+        }
         mstack.pop();        // remove node from stack
         continue;
       }
@@ -2165,13 +2171,6 @@
           continue; // for(int i = ...)
         }
 
-        if( mop == Op_AddP && m->in(AddPNode::Base)->is_DecodeNarrowPtr()) {
-          // Bases used in addresses must be shared but since
-          // they are shared through a DecodeN they may appear
-          // to have a single use so force sharing here.
-          set_shared(m->in(AddPNode::Base)->in(1));
-        }
-
         // if 'n' and 'm' are part of a graph for BMI instruction, clone this node.
 #ifdef X86
         if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
index 14b0ff9..590111a 100644
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -1359,6 +1359,14 @@
         Node* in = mem->in(i);
         Node*  m = optimize_memory_chain(in, t_oop, this, phase);
         if (m == mem) {
+          if (i == 1) {
+            // if the first edge was a loop, check second edge too.
+            // If both are replaceable - we are in an infinite loop
+            Node *n = optimize_memory_chain(mem->in(2), t_oop, this, phase);
+            if (n == mem) {
+              break;
+            }
+          }
           set_req(Memory, mem->in(cnt - i));
           return this; // made change
         }
diff --git a/src/share/vm/opto/reg_split.cpp b/src/share/vm/opto/reg_split.cpp
index af79891..a132f1f 100644
--- a/src/share/vm/opto/reg_split.cpp
+++ b/src/share/vm/opto/reg_split.cpp
@@ -1171,9 +1171,8 @@
               (deflrg._direct_conflict || deflrg._must_spill)) ||
              // Check for LRG being up in a register and we are inside a high
              // pressure area.  Spill it down immediately.
-             (defup && is_high_pressure(b,&deflrg,insidx))) ) {
+             (defup && is_high_pressure(b,&deflrg,insidx) && !n->is_SpillCopy())) ) {
           assert( !n->rematerialize(), "" );
-          assert( !n->is_SpillCopy(), "" );
           // Do a split at the def site.
           maxlrg = split_DEF( n, b, insidx, maxlrg, Reachblock, debug_defs, splits, slidx );
           // If it wasn't split bail
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
index 364152c..57d2f57 100644
--- a/src/share/vm/opto/runtime.cpp
+++ b/src/share/vm/opto/runtime.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -930,12 +930,24 @@
   // create input type (domain)
   int num_args = 4;
   int argcnt = num_args;
+  if(CCallingConventionRequiresIntsAsLongs) {
+    argcnt += 2;
+  }
   const Type** fields = TypeTuple::fields(argcnt);
   int argp = TypeFunc::Parms;
-  fields[argp++] = TypePtr::NOTNULL; // buf
-  fields[argp++] = TypePtr::NOTNULL; // state
-  fields[argp++] = TypeInt::INT;     // ofs
-  fields[argp++] = TypeInt::INT;     // limit
+  if(CCallingConventionRequiresIntsAsLongs) {
+    fields[argp++] = TypePtr::NOTNULL; // buf
+    fields[argp++] = TypePtr::NOTNULL; // state
+    fields[argp++] = TypeLong::LONG;   // ofs
+    fields[argp++] = Type::HALF;
+    fields[argp++] = TypeLong::LONG;   // limit
+    fields[argp++] = Type::HALF;
+  } else {
+    fields[argp++] = TypePtr::NOTNULL; // buf
+    fields[argp++] = TypePtr::NOTNULL; // state
+    fields[argp++] = TypeInt::INT;     // ofs
+    fields[argp++] = TypeInt::INT;     // limit
+  }
   assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
   const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
 
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
index f005ef4..718f077 100644
--- a/src/share/vm/opto/superword.cpp
+++ b/src/share/vm/opto/superword.cpp
@@ -482,7 +482,9 @@
   if (init_nd->is_Con() && p.invar() == NULL) {
     int init = init_nd->bottom_type()->is_int()->get_con();
     int init_offset = init * p.scale_in_bytes() + offset;
-    assert(init_offset >= 0, "positive offset from object start");
+    if (init_offset < 0) { // negative offset from object start?
+      return false;        // may happen in dead loop
+    }
     if (vw % span == 0) {
       // If vm is a multiple of span, we use formula (1).
       if (span > 0) {
diff --git a/src/share/vm/prims/jvmtiEnvBase.hpp b/src/share/vm/prims/jvmtiEnvBase.hpp
index e8c23b1..c6a7db1 100644
--- a/src/share/vm/prims/jvmtiEnvBase.hpp
+++ b/src/share/vm/prims/jvmtiEnvBase.hpp
@@ -32,6 +32,7 @@
 #include "runtime/fieldDescriptor.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/vm_operations.hpp"
 #include "utilities/growableArray.hpp"
@@ -97,7 +98,7 @@
   const void *_env_local_storage;     // per env agent allocated data.
   jvmtiEventCallbacks _event_callbacks;
   jvmtiExtEventCallbacks _ext_event_callbacks;
-  JvmtiTagMap* _tag_map;
+  JvmtiTagMap* volatile _tag_map;
   JvmtiEnvEventEnable _env_event_enable;
   jvmtiCapabilities _current_capabilities;
   jvmtiCapabilities _prohibited_capabilities;
@@ -251,6 +252,13 @@
     return _tag_map;
   }
 
+  JvmtiTagMap* tag_map_acquire() {
+    return (JvmtiTagMap*)OrderAccess::load_ptr_acquire(&_tag_map);
+  }
+
+  void release_set_tag_map(JvmtiTagMap* tag_map) {
+    OrderAccess::release_store_ptr(&_tag_map, tag_map);
+  }
 
   // return true if event is enabled globally or for any thread
   // True only if there is a callback for it.
diff --git a/src/share/vm/prims/jvmtiRedefineClasses.cpp b/src/share/vm/prims/jvmtiRedefineClasses.cpp
index 0e194b6..8b99b21 100644
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp
@@ -67,6 +67,43 @@
   _res = JVMTI_ERROR_NONE;
 }
 
+static inline InstanceKlass* get_ik(jclass def) {
+  oop mirror = JNIHandles::resolve_non_null(def);
+  return InstanceKlass::cast(java_lang_Class::as_Klass(mirror));
+}
+
+// If any of the classes are being redefined, wait
+// Parallel constant pool merging leads to indeterminate constant pools.
+void VM_RedefineClasses::lock_classes() {
+  MutexLocker ml(RedefineClasses_lock);
+  bool has_redefined;
+  do {
+    has_redefined = false;
+    // Go through classes each time until none are being redefined.
+    for (int i = 0; i < _class_count; i++) {
+      if (get_ik(_class_defs[i].klass)->is_being_redefined()) {
+        RedefineClasses_lock->wait();
+        has_redefined = true;
+        break;  // for loop
+      }
+    }
+  } while (has_redefined);
+  for (int i = 0; i < _class_count; i++) {
+    get_ik(_class_defs[i].klass)->set_is_being_redefined(true);
+  }
+  RedefineClasses_lock->notify_all();
+}
+
+void VM_RedefineClasses::unlock_classes() {
+  MutexLocker ml(RedefineClasses_lock);
+  for (int i = 0; i < _class_count; i++) {
+    assert(get_ik(_class_defs[i].klass)->is_being_redefined(),
+           "should be being redefined to get here");
+    get_ik(_class_defs[i].klass)->set_is_being_redefined(false);
+  }
+  RedefineClasses_lock->notify_all();
+}
+
 bool VM_RedefineClasses::doit_prologue() {
   if (_class_count == 0) {
     _res = JVMTI_ERROR_NONE;
@@ -89,12 +126,21 @@
       _res = JVMTI_ERROR_NULL_POINTER;
       return false;
     }
+
+    oop mirror = JNIHandles::resolve_non_null(_class_defs[i].klass);
+    // classes for primitives and arrays cannot be redefined
+    // check here so following code can assume these classes are InstanceKlass
+    if (!is_modifiable_class(mirror)) {
+      _res = JVMTI_ERROR_UNMODIFIABLE_CLASS;
+      return false;
+    }
   }
 
   // Start timer after all the sanity checks; not quite accurate, but
   // better than adding a bunch of stop() calls.
   RC_TIMER_START(_timer_vm_op_prologue);
 
+  lock_classes();
   // We first load new class versions in the prologue, because somewhere down the
   // call chain it is required that the current thread is a Java thread.
   _res = load_new_class_versions(Thread::current());
@@ -105,12 +151,18 @@
         ClassLoaderData* cld = _scratch_classes[i]->class_loader_data();
         // Free the memory for this class at class unloading time.  Not before
         // because CMS might think this is still live.
+        InstanceKlass* ik = get_ik(_class_defs[i].klass);
+        if (ik->get_cached_class_file() == ((InstanceKlass*)_scratch_classes[i])->get_cached_class_file()) {
+          // Don't double-free cached_class_file copied from the original class if error.
+          ((InstanceKlass*)_scratch_classes[i])->set_cached_class_file(NULL);
+        }
         cld->add_to_deallocate_list((InstanceKlass*)_scratch_classes[i]);
       }
     }
     // Free os::malloc allocated memory in load_new_class_version.
     os::free(_scratch_classes);
     RC_TIMER_STOP(_timer_vm_op_prologue);
+    unlock_classes();
     return false;
   }
 
@@ -170,6 +222,8 @@
 }
 
 void VM_RedefineClasses::doit_epilogue() {
+  unlock_classes();
+
   // Free os::malloc allocated memory.
   os::free(_scratch_classes);
 
@@ -961,14 +1015,7 @@
     // versions are deleted. Constant pools are deallocated while merging
     // constant pools
     HandleMark hm(THREAD);
-
-    oop mirror = JNIHandles::resolve_non_null(_class_defs[i].klass);
-    // classes for primitives cannot be redefined
-    if (!is_modifiable_class(mirror)) {
-      return JVMTI_ERROR_UNMODIFIABLE_CLASS;
-    }
-    Klass* the_class_oop = java_lang_Class::as_Klass(mirror);
-    instanceKlassHandle the_class = instanceKlassHandle(THREAD, the_class_oop);
+    instanceKlassHandle the_class(THREAD, get_ik(_class_defs[i].klass));
     Symbol*  the_class_sym = the_class->name();
 
     // RC_TRACE_WITH_THREAD macro has an embedded ResourceMark
@@ -3855,22 +3902,19 @@
   HandleMark hm(THREAD);   // make sure handles from this call are freed
   RC_TIMER_START(_timer_rsc_phase1);
 
-  instanceKlassHandle scratch_class(scratch_class_oop);
-
-  oop the_class_mirror = JNIHandles::resolve_non_null(the_jclass);
-  Klass* the_class_oop = java_lang_Class::as_Klass(the_class_mirror);
-  instanceKlassHandle the_class = instanceKlassHandle(THREAD, the_class_oop);
+  instanceKlassHandle scratch_class(THREAD, scratch_class_oop);
+  instanceKlassHandle the_class(THREAD, get_ik(the_jclass));
 
   // Remove all breakpoints in methods of this class
   JvmtiBreakpoints& jvmti_breakpoints = JvmtiCurrentBreakpoints::get_jvmti_breakpoints();
-  jvmti_breakpoints.clearall_in_class_at_safepoint(the_class_oop);
+  jvmti_breakpoints.clearall_in_class_at_safepoint(the_class());
 
   // Deoptimize all compiled code that depends on this class
   flush_dependent_code(the_class, THREAD);
 
   _old_methods = the_class->methods();
   _new_methods = scratch_class->methods();
-  _the_class_oop = the_class_oop;
+  _the_class_oop = the_class();
   compute_added_deleted_matching_methods();
   update_jmethod_ids();
 
@@ -3980,12 +4024,12 @@
   // with them was cached on the scratch class, move to the_class.
   // Note: we still want to do this if nothing needed caching since it
   // should get cleared in the_class too.
-  if (the_class->get_cached_class_file_bytes() == 0) {
+  if (the_class->get_cached_class_file() == 0) {
     // the_class doesn't have a cache yet so copy it
     the_class->set_cached_class_file(scratch_class->get_cached_class_file());
   }
-  else if (scratch_class->get_cached_class_file_bytes() !=
-           the_class->get_cached_class_file_bytes()) {
+  else if (scratch_class->get_cached_class_file() !=
+           the_class->get_cached_class_file()) {
     // The same class can be present twice in the scratch classes list or there
     // are multiple concurrent RetransformClasses calls on different threads.
     // In such cases we have to deallocate scratch_class cached_class_file.
@@ -4094,14 +4138,14 @@
   RC_TRACE_WITH_THREAD(0x00000001, THREAD,
     ("redefined name=%s, count=%d (avail_mem=" UINT64_FORMAT "K)",
     the_class->external_name(),
-    java_lang_Class::classRedefinedCount(the_class_mirror),
+    java_lang_Class::classRedefinedCount(the_class->java_mirror()),
     os::available_memory() >> 10));
 
   {
     ResourceMark rm(THREAD);
     Events::log_redefinition(THREAD, "redefined class name=%s, count=%d",
                              the_class->external_name(),
-                             java_lang_Class::classRedefinedCount(the_class_mirror));
+                             java_lang_Class::classRedefinedCount(the_class->java_mirror()));
 
   }
   RC_TIMER_STOP(_timer_rsc_phase2);
diff --git a/src/share/vm/prims/jvmtiRedefineClasses.hpp b/src/share/vm/prims/jvmtiRedefineClasses.hpp
index aab9d45..167c01c 100644
--- a/src/share/vm/prims/jvmtiRedefineClasses.hpp
+++ b/src/share/vm/prims/jvmtiRedefineClasses.hpp
@@ -490,6 +490,10 @@
 
   void flush_dependent_code(instanceKlassHandle k_h, TRAPS);
 
+  // lock classes to redefine since constant pool merging isn't thread safe.
+  void lock_classes();
+  void unlock_classes();
+
   static void dump_methods();
 
   // Check that there are no old or obsolete methods
diff --git a/src/share/vm/prims/jvmtiTagMap.cpp b/src/share/vm/prims/jvmtiTagMap.cpp
index c45181d..ba48fc3 100644
--- a/src/share/vm/prims/jvmtiTagMap.cpp
+++ b/src/share/vm/prims/jvmtiTagMap.cpp
@@ -430,7 +430,7 @@
   _hashmap = new JvmtiTagHashmap();
 
   // finally add us to the environment
-  ((JvmtiEnvBase *)env)->set_tag_map(this);
+  ((JvmtiEnvBase *)env)->release_set_tag_map(this);
 }
 
 
@@ -499,7 +499,7 @@
 // returns the tag map for the given environments. If the tag map
 // doesn't exist then it is created.
 JvmtiTagMap* JvmtiTagMap::tag_map_for(JvmtiEnv* env) {
-  JvmtiTagMap* tag_map = ((JvmtiEnvBase*)env)->tag_map();
+  JvmtiTagMap* tag_map = ((JvmtiEnvBase*)env)->tag_map_acquire();
   if (tag_map == NULL) {
     MutexLocker mu(JvmtiThreadState_lock);
     tag_map = ((JvmtiEnvBase*)env)->tag_map();
@@ -3282,7 +3282,7 @@
   if (JvmtiEnv::environments_might_exist()) {
     JvmtiEnvIterator it;
     for (JvmtiEnvBase* env = it.first(); env != NULL; env = it.next(env)) {
-      JvmtiTagMap* tag_map = env->tag_map();
+      JvmtiTagMap* tag_map = env->tag_map_acquire();
       if (tag_map != NULL && !tag_map->is_empty()) {
         tag_map->do_weak_oops(is_alive, f);
       }
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index 43e31d2..2665b6b 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -556,8 +556,7 @@
 
   /* Scan the directory for jars/zips, appending them to path. */
   struct dirent *entry;
-  char *dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(directory), mtInternal);
-  while ((entry = os::readdir(dir, (dirent *) dbuf)) != NULL) {
+  while ((entry = os::readdir(dir)) != NULL) {
     const char* name = entry->d_name;
     const char* ext = name + strlen(name) - 4;
     bool isJarOrZip = ext > name &&
@@ -571,7 +570,6 @@
       FREE_C_HEAP_ARRAY(char, jarpath, mtInternal);
     }
   }
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
   os::closedir(dir);
   return path;
 }
@@ -3485,14 +3483,12 @@
   if (dir == NULL) return false;
 
   struct dirent *entry;
-  char *dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(directory), mtInternal);
   bool hasJarFile = false;
-  while (!hasJarFile && (entry = os::readdir(dir, (dirent *) dbuf)) != NULL) {
+  while (!hasJarFile && (entry = os::readdir(dir)) != NULL) {
     const char* name = entry->d_name;
     const char* ext = name + strlen(name) - 4;
     hasJarFile = ext > name && (os::file_name_strcmp(ext, ".jar") == 0);
   }
-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
   os::closedir(dir);
   return hasJarFile ;
 }
@@ -3574,8 +3570,7 @@
   if (dir != NULL) {
     int num_ext_jars = 0;
     struct dirent *entry;
-    char *dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(extDir), mtInternal);
-    while ((entry = os::readdir(dir, (dirent *) dbuf)) != NULL) {
+    while ((entry = os::readdir(dir)) != NULL) {
       const char* name = entry->d_name;
       const char* ext = name + strlen(name) - 4;
       if (ext > name && (os::file_name_strcmp(ext, ".jar") == 0)) {
@@ -3594,7 +3589,6 @@
         }
       }
     }
-    FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
     os::closedir(dir);
     if (num_ext_jars > 0) {
       nonEmptyDirs += 1;
diff --git a/src/share/vm/runtime/java.cpp b/src/share/vm/runtime/java.cpp
index faec9ba..721f743 100644
--- a/src/share/vm/runtime/java.cpp
+++ b/src/share/vm/runtime/java.cpp
@@ -45,6 +45,7 @@
 #include "runtime/arguments.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/compilationPolicy.hpp"
+#include "runtime/deoptimization.hpp"
 #include "runtime/fprofiler.hpp"
 #include "runtime/init.hpp"
 #include "runtime/interfaceSupport.hpp"
diff --git a/src/share/vm/runtime/mutexLocker.cpp b/src/share/vm/runtime/mutexLocker.cpp
index f358c75..1f61967 100644
--- a/src/share/vm/runtime/mutexLocker.cpp
+++ b/src/share/vm/runtime/mutexLocker.cpp
@@ -125,6 +125,7 @@
 Mutex*   Management_lock              = NULL;
 Monitor* Service_lock                 = NULL;
 Monitor* PeriodicTask_lock            = NULL;
+Monitor* RedefineClasses_lock         = NULL;
 
 #ifdef INCLUDE_TRACE
 Mutex*   JfrStacktrace_lock           = NULL;
@@ -279,6 +280,7 @@
   def(ProfileVM_lock               , Monitor, special,   false); // used for profiling of the VMThread
   def(CompileThread_lock           , Monitor, nonleaf+5,   false );
   def(PeriodicTask_lock            , Monitor, nonleaf+5,   true);
+  def(RedefineClasses_lock         , Monitor, nonleaf+5,   true);
 
 #ifdef INCLUDE_TRACE
   def(JfrMsg_lock                  , Monitor, leaf,        true);
diff --git a/src/share/vm/runtime/mutexLocker.hpp b/src/share/vm/runtime/mutexLocker.hpp
index be86bac..138e30e 100644
--- a/src/share/vm/runtime/mutexLocker.hpp
+++ b/src/share/vm/runtime/mutexLocker.hpp
@@ -141,6 +141,7 @@
 extern Mutex*   Management_lock;                 // a lock used to serialize JVM management
 extern Monitor* Service_lock;                    // a lock used for service thread operation
 extern Monitor* PeriodicTask_lock;               // protects the periodic task structure
+extern Monitor* RedefineClasses_lock;            // locks classes from parallel redefinition
 
 #ifdef INCLUDE_TRACE
 extern Mutex*   JfrStacktrace_lock;              // used to guard access to the JFR stacktrace table
diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp
index 931f4cb..14cd942 100644
--- a/src/share/vm/runtime/os.hpp
+++ b/src/share/vm/runtime/os.hpp
@@ -561,8 +561,7 @@
 
   // Reading directories.
   static DIR*           opendir(const char* dirname);
-  static int            readdir_buf_size(const char *path);
-  static struct dirent* readdir(DIR* dirp, dirent* dbuf);
+  static struct dirent* readdir(DIR* dirp);
   static int            closedir(DIR* dirp);
 
   // Dynamic library extension
diff --git a/src/share/vm/runtime/stubRoutines.cpp b/src/share/vm/runtime/stubRoutines.cpp
index eb30640..6b8f7e7 100644
--- a/src/share/vm/runtime/stubRoutines.cpp
+++ b/src/share/vm/runtime/stubRoutines.cpp
@@ -174,6 +174,9 @@
     }
     CodeBuffer buffer(_code1);
     StubGenerator_generate(&buffer, false);
+    // When new stubs added we need to make sure there is some space left
+    // to catch situation when we should increase size again.
+    assert(buffer.insts_remaining() > 200, "increase code_size1");
   }
 }
 
@@ -226,6 +229,9 @@
     }
     CodeBuffer buffer(_code2);
     StubGenerator_generate(&buffer, true);
+    // When new stubs added we need to make sure there is some space left
+    // to catch situation when we should increase size again.
+    assert(buffer.insts_remaining() > 200, "increase code_size2");
   }
 
 #ifdef ASSERT
diff --git a/src/share/vm/services/classLoadingService.cpp b/src/share/vm/services/classLoadingService.cpp
index 92a548b..fce2b27 100644
--- a/src/share/vm/services/classLoadingService.cpp
+++ b/src/share/vm/services/classLoadingService.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -69,7 +69,7 @@
       len = name->utf8_length();                    \
     }                                               \
     HOTSPOT_CLASS_##type( /* type = unloaded, loaded */ \
-      data, len, (clss)->class_loader(), (shared)); \
+      data, len, (void *)(clss)->class_loader(), (shared)); \
   }
 
 #endif /* USDT2 */
diff --git a/src/share/vm/utilities/taskqueue.hpp b/src/share/vm/utilities/taskqueue.hpp
index 8bdb38d..d2b39c9 100644
--- a/src/share/vm/utilities/taskqueue.hpp
+++ b/src/share/vm/utilities/taskqueue.hpp
@@ -29,6 +29,7 @@
 #include "memory/allocation.inline.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/orderAccess.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/stack.hpp"
 
 // Simple TaskQueue stats that are collected by default in debug builds.
@@ -607,7 +608,9 @@
 private:
   int _n_threads;
   TaskQueueSetSuper* _queue_set;
+  char _pad_before[DEFAULT_CACHE_LINE_SIZE];
   int _offered_termination;
+  char _pad_after[DEFAULT_CACHE_LINE_SIZE];
 
 #ifdef TRACESPINNING
   static uint _total_yields;
diff --git a/test/Makefile b/test/Makefile
index e91844e..447619b 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -119,11 +119,11 @@
 
 # Root of all test results
 ifdef ALT_OUTPUTDIR
-  ABS_BUILD_ROOT = $(ALT_OUTPUTDIR)/$(PLATFORM)-$(ARCH)
+  ABS_BUILD_ROOT = $(ALT_OUTPUTDIR)
 else
   ABS_BUILD_ROOT = $(TEST_ROOT)/../build/$(PLATFORM)-$(ARCH)
 endif
-ABS_TEST_OUTPUT_DIR = $(ABS_BUILD_ROOT)/testoutput
+ABS_TEST_OUTPUT_DIR = $(ABS_BUILD_ROOT)/testoutput/$(UNIQUE_DIR)
 
 # Expect JPRT to set PRODUCT_HOME (the product or jdk in this case to test)
 ifndef PRODUCT_HOME
@@ -267,7 +267,7 @@
 # Only run automatic tests
 JTREG_BASIC_OPTIONS += -a
 # Report details on all failed or error tests, times too
-JTREG_BASIC_OPTIONS += -v:fail,error,time
+JTREG_BASIC_OPTIONS += -v:fail,error,summary
 # Retain all files for failing tests
 JTREG_BASIC_OPTIONS += -retain:fail,error
 # Ignore tests are not run and completely silent about it
diff --git a/test/TEST.groups b/test/TEST.groups
index 4075036..216e307 100644
--- a/test/TEST.groups
+++ b/test/TEST.groups
@@ -124,7 +124,6 @@
  -:needs_jre \
  -:needs_jdk
 
-
 # When called from top level the test suites use the hotspot_ prefix
 hotspot_wbapitest = \
   sanity/
@@ -147,6 +146,11 @@
   :hotspot_gc \
   :hotspot_runtime \
   :hotspot_serviceability
+
+# Right now tier1 runs all hotspot tests
+hotspot_tier1 = \
+  :jdk
+
 # Tests that require compact3 API's
 #
 needs_compact3 = \
diff --git a/test/compiler/c1/TestGotoIf.jasm b/test/compiler/c1/TestGotoIf.jasm
new file mode 100644
index 0000000..b5dc03a
--- /dev/null
+++ b/test/compiler/c1/TestGotoIf.jasm
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+public class compiler/c1/TestGotoIf version 52:0 {
+    public Field f1:"I";
+    public Field f2:"I";
+    public static Field i:"I";
+
+    Method "<init>":"()V" stack 1 locals 1 {
+        aload_0;
+        invokespecial Method java/lang/Object."<init>":"()V";
+        return;
+    }
+
+    public Method test1:"()I" stack 3 locals 1 {
+        aload_0;
+        getfield Field f1:"I";
+        aload_0;
+        getfield Field f2:"I";
+        iconst_1;
+        isub;
+        // Without the fix, if got eliminated by CEE
+        if_icmpgt Null;
+        iconst_1;
+      Return: stack_frame_type stack1;
+      stack_map int;
+        ireturn;
+      Null: stack_frame_type same;
+        iconst_0;
+        goto Return; // Backbranch (t_goto) with safepoint
+    }
+
+    public Method test2:"()I" stack 3 locals 1 {
+        aload_0;
+        getfield Field f1:"I";
+        aload_0;
+        getfield Field f2:"I";
+        iconst_1;
+        isub;
+        goto Skip;
+      Return: stack_frame_type full;
+      stack_map int;
+        ireturn;
+      Skip: stack_frame_type full;
+      stack_map int, int;
+        // Without the fix, if got eliminated by CEE
+        if_icmpgt Null;
+        iconst_1;
+        goto Return; // Backbranch (f_goto) with safepoint
+      Null: stack_frame_type full;
+      stack_map;
+        iconst_0;
+        goto Return; // Backbranch (t_goto) with safepoint
+    }
+
+    public Method test3:"()I" stack 3 locals 1 {
+        aload_0;
+        getfield Field f1:"I";
+        aload_0;
+        getfield Field f2:"I";
+        iconst_1;
+        isub;
+        goto Skip;
+      Return: stack_frame_type full;
+      stack_map int;
+        ireturn;
+      Null: stack_frame_type full;
+      stack_map;
+        iconst_0;
+        goto Return; // Backbranch (t_goto) with safepoint
+      Skip: stack_frame_type full;
+      stack_map int, int;
+        // If will be eliminated by CEE
+        if_icmpgt Null; // Backbranch (if) with safepoint
+        iconst_1;
+        goto Return; // Backbranch (f_goto) with safepoint
+    }
+
+    public Method test4:"()I" stack 3 locals 1 {
+        aload_0;
+        getfield Field f1:"I";
+        aload_0;
+        getfield Field f2:"I";
+        iconst_1;
+        isub;
+        goto Skip;
+      Null: stack_frame_type full;
+      stack_map;
+        iconst_0;
+      Return: stack_frame_type full;
+      stack_map int;
+        ireturn; 
+      Skip: stack_frame_type full;
+      stack_map int, int;
+        // If will be eliminated by CEE
+        if_icmpgt Null; // Backbranch (if) with safepoint
+        iconst_1;
+        goto Return; // Backbranch (f_goto) with safepoint
+    }
+
+    public Method test5:"()I" stack 3 locals 2 {
+        aload_0;
+        getfield Field f1:"I";
+        aload_0;
+        getfield Field f2:"I";
+        iconst_1;
+        isub;
+        goto Skip;
+      Null: stack_frame_type full;
+      stack_map;
+        iconst_0;
+        goto Return;
+      Skip: stack_frame_type full;
+      stack_map int, int;
+        // If will be eliminated by CEE
+        if_icmpgt Null; // Backbranch (if) with safepoint
+        iconst_1;
+      Return: stack_frame_type full;
+      stack_map int;
+        ireturn; 
+    }
+
+    public Method test6:"()I" stack 4 locals 1 {
+        getstatic Field i:"I";
+      Loop: stack_frame_type full;
+      stack_map int;
+        // Decrement i and exit loop if < 0
+        iconst_0;
+        getstatic Field i:"I";
+        iconst_1;
+        isub;
+        dup;
+        putstatic Field i:"I";
+        if_icmpgt Exit;
+
+        iconst_1;
+        // Without the fix, if got eliminated by CEE
+        if_icmpgt Null;
+        iconst_1;
+        goto Loop; // Backbranch (f_goto) with safepoint
+      Null: stack_frame_type same;
+        iconst_0;
+        goto Loop; // Backbranch (t_goto) with safepoint
+
+      Exit: stack_frame_type full;
+      stack_map int;
+        iconst_0;
+        ireturn; 
+    }
+}
diff --git a/test/compiler/c1/TestGotoIfMain.java b/test/compiler/c1/TestGotoIfMain.java
new file mode 100644
index 0000000..ac8b1b8
--- /dev/null
+++ b/test/compiler/c1/TestGotoIfMain.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8218721
+ * @compile TestGotoIf.jasm
+ * @run main/othervm -XX:TieredStopAtLevel=1 -Xcomp
+ *                   -XX:CompileCommand=compileonly,compiler.c1.TestGotoIf::test*
+ *                   compiler.c1.TestGotoIfMain
+ */
+
+package compiler.c1;
+
+public class TestGotoIfMain {
+    public static void main(String[] args) {
+        TestGotoIf test = new TestGotoIf();
+        test.i = 5;
+        test.test1();
+        test.test2();
+        test.test3();
+        test.test4();
+        test.test5();
+        test.test6();
+    }
+}
diff --git a/test/compiler/c2/Test8217359.java b/test/compiler/c2/Test8217359.java
new file mode 100644
index 0000000..ca0d2cc
--- /dev/null
+++ b/test/compiler/c2/Test8217359.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2019, Huawei Technologies Co. Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8217359
+ * @summary C2 compiler triggers SIGSEGV after transformation in ConvI2LNode::Ideal
+ *
+ * @run main/othervm -XX:-TieredCompilation
+ *      -XX:CompileCommand=compileonly,compiler.c2.Test8217359::test
+ *      compiler.c2.Test8217359
+ */
+
+package compiler.c2;
+
+public class Test8217359 {
+
+    public static int ival = 0;
+    public static long lsum = 0;
+    public static long lval = 0;
+
+    public static void test() {
+        short s = -25632;
+        float f = 0.512F, f1 = 2.556F;
+        int i6 = 32547, i7 = 9, i8 = -9, i9 = 36, i10 = -223;
+
+        for (i6 = 4; i6 < 182; i6++) {
+            i8 = 1;
+            while (++i8 < 17) {
+                f1 = 1;
+                do {
+                    i7 += (182 + (f1 * f1));
+                } while (++f1 < 1);
+
+                Test8217359.ival += (i8 | Test8217359.ival);
+            }
+        }
+
+        for (i9 = 9; i9 < 100; ++i9) {
+            i10 -= i6;
+            i10 >>= s;
+            i7 += (((i9 * i10) + i6) - Test8217359.lval);
+        }
+
+        lsum += i6 + i7 + i8;
+    }
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 16000; i++) {
+            test();
+        }
+    }
+
+}
diff --git a/test/compiler/c2/TestIfWithDeadRegion.java b/test/compiler/c2/TestIfWithDeadRegion.java
new file mode 100644
index 0000000..1117908
--- /dev/null
+++ b/test/compiler/c2/TestIfWithDeadRegion.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8219807
+ * @summary Test IfNode::up_one_dom() with dead regions.
+ * @compile -XDstringConcat=inline TestIfWithDeadRegion.java
+ * @run main/othervm -XX:CompileCommand=compileonly,compiler.c2.TestIfWithDeadRegion::test
+ *                   compiler.c2.TestIfWithDeadRegion
+ */
+
+package compiler.c2;
+
+import java.util.function.Supplier;
+
+public class TestIfWithDeadRegion {
+
+    static String msg;
+
+    static String getString(String s, int i) {
+        String current = s + String.valueOf(i);
+        System.nanoTime();
+        return current;
+    }
+
+    static void test(Supplier<String> supplier) {
+        msg = supplier.get();
+    }
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 20_000; ++i) {
+            test(() -> getString("Test1", 42));
+            test(() -> getString("Test2", 42));
+        }
+    }
+}
diff --git a/test/compiler/escapeAnalysis/TestGetClass.java b/test/compiler/escapeAnalysis/TestGetClass.java
new file mode 100644
index 0000000..7b2b587
--- /dev/null
+++ b/test/compiler/escapeAnalysis/TestGetClass.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8218201
+ * @summary BCEscapeAnalyzer assigns wrong escape state to getClass return value.
+ * @run main/othervm -XX:-TieredCompilation -Xcomp -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_getClass
+ *                   -XX:CompileCommand=quiet -XX:CompileCommand=compileonly,compiler.escapeAnalysis.TestGetClass::test
+ *                   -XX:+PrintCompilation compiler.escapeAnalysis.TestGetClass
+ */
+
+package compiler.escapeAnalysis;
+
+public class TestGetClass {
+    static Object obj = new Object();
+
+    public static boolean test() {
+        if (obj.getClass() == Object.class) {
+            synchronized (obj) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public static void main(String[] args) {
+        if (!test()) {
+            throw new RuntimeException("Test failed");
+        }
+    }
+}
diff --git a/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
index c5d21ed..3285071 100644
--- a/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
@@ -36,7 +36,8 @@
     public GenericTestCaseForOtherCPU(String optionName) {
         // Execute the test case on any CPU except SPARC and X86
         super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc,
-                new OrPredicate(Platform::isX64, Platform::isX86))));
+                new OrPredicate(Platform::isPPC,
+                new OrPredicate(Platform::isX64, Platform::isX86)))));
     }
 
     @Override
diff --git a/test/compiler/loopopts/superword/TestNegBaseOffset.java b/test/compiler/loopopts/superword/TestNegBaseOffset.java
new file mode 100644
index 0000000..4fe83ee
--- /dev/null
+++ b/test/compiler/loopopts/superword/TestNegBaseOffset.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8202948
+ * @summary Test skipping vector packs with negative base offset.
+ * @comment Test fails only with -Xcomp when profiling data is not present.
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions
+ *                   -Xcomp -XX:-TieredCompilation -XX:CICompilerCount=1
+ *                   -XX:CompileOnly=compiler/loopopts/superword/TestNegBaseOffset
+ *                   compiler.loopopts.superword.TestNegBaseOffset
+ */
+
+package compiler.loopopts.superword;
+
+public class TestNegBaseOffset {
+    public static final int N = 400;
+    public static int iFld=10;
+    public static int iArr[]=new int[N];
+
+    public static void mainTest() {
+        int i0=1, i2;
+        while (++i0 < 339) {
+            if ((i0 % 2) == 0) {
+                for (i2 = 2; i2 > i0; i2 -= 3) {
+                    iArr[i2 - 1] &= iFld;
+                }
+            }
+        }
+    }
+
+    public static void main(String[] strArr) {
+        for (int i = 0; i < 10; i++) {
+            mainTest();
+        }
+    }
+}
+
diff --git a/test/compiler/rangechecks/RangeCheckEliminationScaleNotOne.java b/test/compiler/rangechecks/RangeCheckEliminationScaleNotOne.java
new file mode 100644
index 0000000..50b268a
--- /dev/null
+++ b/test/compiler/rangechecks/RangeCheckEliminationScaleNotOne.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8215265
+ * @summary C2: range check elimination may allow illegal out of bound access
+ *
+ * @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:-UseLoopPredicate RangeCheckEliminationScaleNotOne
+ *
+ */
+
+import java.util.Arrays;
+
+public class RangeCheckEliminationScaleNotOne {
+    public static void main(String[] args) {
+        {
+            int[] array = new int[199];
+            boolean[] flags = new boolean[100];
+            Arrays.fill(flags, true);
+            flags[0] = false;
+            flags[1] = false;
+            for (int i = 0; i < 20_000; i++) {
+                test1(100, array, 0, flags);
+            }
+            boolean ex = false;
+            try {
+                test1(100, array, -5, flags);
+            } catch (ArrayIndexOutOfBoundsException aie) {
+                ex = true;
+            }
+            if (!ex) {
+                throw new RuntimeException("no AIOOB exception");
+            }
+        }
+
+        {
+            int[] array = new int[199];
+            boolean[] flags = new boolean[100];
+            Arrays.fill(flags, true);
+            flags[0] = false;
+            flags[1] = false;
+            for (int i = 0; i < 20_000; i++) {
+                test2(100, array, 198, flags);
+            }
+            boolean ex = false;
+            try {
+                test2(100, array, 203, flags);
+            } catch (ArrayIndexOutOfBoundsException aie) {
+                ex = true;
+            }
+            if (!ex) {
+                throw new RuntimeException("no AIOOB exception");
+            }
+        }
+    }
+
+    private static int test1(int stop, int[] array, int offset, boolean[] flags) {
+        if (array == null) {}
+        int res = 0;
+        for (int i = 0; i < stop; i++) {
+            if (flags[i]) {
+                res += array[2 * i + offset];
+            }
+        }
+        return res;
+    }
+
+
+    private static int test2(int stop, int[] array, int offset, boolean[] flags) {
+        if (array == null) {}
+        int res = 0;
+        for (int i = 0; i < stop; i++) {
+            if (flags[i]) {
+                res += array[-2 * i + offset];
+            }
+        }
+        return res;
+    }
+}
diff --git a/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
index 51ab609..bac4665 100644
--- a/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+++ b/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
@@ -63,12 +63,20 @@
                     null);
 
     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
-            = new CPUSpecificPredicate("sparc.*", new String[] { "sha256" },
-                    null);
+            = new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256" },
+                                                       null),
+              new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"    },
+                                                       null),
+                              new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"    },
+                                                       null)));
 
     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
-            = new CPUSpecificPredicate("sparc.*", new String[] { "sha512" },
-                    null);
+            = new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha512" },
+                                                       null),
+              new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"    },
+                                                       null),
+                              new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"    },
+                                                       null)));
 
     public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
             = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
diff --git a/test/runtime/RedefineTests/RedefineDoubleDelete.java b/test/runtime/RedefineTests/RedefineDoubleDelete.java
new file mode 100644
index 0000000..1f4533f
--- /dev/null
+++ b/test/runtime/RedefineTests/RedefineDoubleDelete.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8178870
+ * @library /testlibrary
+ * @summary Redefine class with CFLH twice to test deleting the cached_class_file
+ * @build RedefineClassHelper
+ * @run main RedefineClassHelper
+ * @run main/othervm -javaagent:redefineagent.jar RedefineDoubleDelete
+ */
+
+public class RedefineDoubleDelete {
+
+    // Class gets a redefinition error because it adds a data member
+    public static String newB =
+                "class RedefineDoubleDelete$B {" +
+                "   int count1 = 0;" +
+                "}";
+
+    public static String newerB =
+                "class RedefineDoubleDelete$B { " +
+                "   int faa() { System.out.println(\"baa\"); return 2; }" +
+                "}";
+
+    // The ClassFileLoadHook for this class turns foo into faa and prints out faa.
+    static class B {
+      int faa() { System.out.println("foo"); return 1; }
+    }
+
+    public static void main(String args[]) throws Exception {
+
+        B b = new B();
+        int val = b.faa();
+        if (val != 1) {
+            throw new RuntimeException("return value wrong " + val);
+        }
+
+        // Redefine B twice to get cached_class_file in both B scratch classes
+        try {
+            RedefineClassHelper.redefineClass(B.class, newB);
+        } catch (java.lang.UnsupportedOperationException e) {
+            // this is expected
+        }
+        try {
+            RedefineClassHelper.redefineClass(B.class, newB);
+        } catch (java.lang.UnsupportedOperationException e) {
+            // this is expected
+        }
+
+        // Do a full GC.
+        System.gc();
+
+        // Redefine with a compatible class
+        RedefineClassHelper.redefineClass(B.class, newerB);
+        val = b.faa();
+        if (val != 2) {
+            throw new RuntimeException("return value wrong " + val);
+        }
+
+        // Do another full GC to clean things up.
+        System.gc();
+    }
+}
diff --git a/test/runtime/RedefineTests/libRedefineDoubleDelete.c b/test/runtime/RedefineTests/libRedefineDoubleDelete.c
new file mode 100644
index 0000000..d855e87
--- /dev/null
+++ b/test/runtime/RedefineTests/libRedefineDoubleDelete.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "jvmti.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef JNI_ENV_ARG
+
+#ifdef __cplusplus
+#define JNI_ENV_ARG(x, y) y
+#define JNI_ENV_PTR(x) x
+#else
+#define JNI_ENV_ARG(x,y) x, y
+#define JNI_ENV_PTR(x) (*x)
+#endif
+
+#endif
+
+#define TranslateError(err) "JVMTI error"
+
+static jvmtiEnv *jvmti = NULL;
+
+static jint Agent_Initialize(JavaVM *jvm, char *options, void *reserved);
+
+JNIEXPORT
+jint JNICALL Agent_OnLoad(JavaVM *jvm, char *options, void *reserved) {
+    return Agent_Initialize(jvm, options, reserved);
+}
+
+JNIEXPORT
+jint JNICALL Agent_OnAttach(JavaVM *jvm, char *options, void *reserved) {
+    return Agent_Initialize(jvm, options, reserved);
+}
+
+JNIEXPORT
+jint JNICALL JNI_OnLoad(JavaVM *jvm, void *reserved) {
+    return JNI_VERSION_1_8;
+}
+
+
+static jint newClassDataLen = 0;
+static unsigned char* newClassData = NULL;
+
+static jint
+getBytecodes(jvmtiEnv *jvmti_env,
+             jint class_data_len, const unsigned char* class_data) {
+    int i;
+    jint res;
+
+    newClassDataLen = class_data_len;
+    res = (*jvmti_env)->Allocate(jvmti_env, newClassDataLen, &newClassData);
+    if (res != JNI_OK) {
+        printf("    Unable to allocate bytes\n");
+        return JNI_ERR;
+    }
+    for (i = 0; i < newClassDataLen; i++) {
+        newClassData[i] = class_data[i];
+        // Rewrite oo in class to aa
+        if (i > 0 && class_data[i] == 'o' && class_data[i-1] == 'o') {
+            newClassData[i] = newClassData[i-1] = 'a';
+        }
+    }
+    printf("  ... copied bytecode: %d bytes\n", (int)newClassDataLen);
+    return JNI_OK;
+}
+
+
+static void JNICALL
+Callback_ClassFileLoadHook(jvmtiEnv *jvmti_env, JNIEnv *env,
+                           jclass class_being_redefined,
+                           jobject loader, const char* name, jobject protection_domain,
+                           jint class_data_len, const unsigned char* class_data,
+                           jint *new_class_data_len, unsigned char** new_class_data) {
+    if (name != NULL && strcmp(name, "RedefineDoubleDelete$B") == 0) {
+        if (newClassData == NULL) {
+            jint res = getBytecodes(jvmti_env, class_data_len, class_data);
+            if (res == JNI_ERR) {
+              printf(">>>    ClassFileLoadHook event: class name %s FAILED\n", name);
+              return;
+            }
+            // Only change for first CFLH event.
+            *new_class_data_len = newClassDataLen;
+            *new_class_data = newClassData;
+        }
+        printf(">>>    ClassFileLoadHook event: class name %s\n", name);
+    }
+}
+
+static
+jint Agent_Initialize(JavaVM *jvm, char *options, void *reserved) {
+    jint res, size;
+    jvmtiCapabilities caps;
+    jvmtiEventCallbacks callbacks;
+    jvmtiError err;
+
+    res = JNI_ENV_PTR(jvm)->GetEnv(JNI_ENV_ARG(jvm, (void **) &jvmti),
+        JVMTI_VERSION_1_2);
+    if (res != JNI_OK || jvmti == NULL) {
+        printf("    Error: wrong result of a valid call to GetEnv!\n");
+        return JNI_ERR;
+    }
+
+    printf("Enabling following capabilities: can_generate_all_class_hook_events, "
+           "can_retransform_classes, can_redefine_classes");
+    memset(&caps, 0, sizeof(caps));
+    caps.can_generate_all_class_hook_events = 1;
+    caps.can_retransform_classes = 1;
+    caps.can_redefine_classes = 1;
+    printf("\n");
+
+    err = (*jvmti)->AddCapabilities(jvmti, &caps);
+    if (err != JVMTI_ERROR_NONE) {
+        printf("    Error in AddCapabilites: %s (%d)\n", TranslateError(err), err);
+        return JNI_ERR;
+    }
+
+    size = (jint)sizeof(callbacks);
+
+    memset(&callbacks, 0, sizeof(callbacks));
+    callbacks.ClassFileLoadHook = Callback_ClassFileLoadHook;
+
+    err = (*jvmti)->SetEventCallbacks(jvmti, &callbacks, size);
+    if (err != JVMTI_ERROR_NONE) {
+        printf("    Error in SetEventCallbacks: %s (%d)\n", TranslateError(err), err);
+        return JNI_ERR;
+    }
+
+    err = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, NULL);
+    if (err != JVMTI_ERROR_NONE) {
+        printf("    Error in SetEventNotificationMode: %s (%d)\n", TranslateError(err), err);
+        return JNI_ERR;
+    }
+
+    return JNI_OK;
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/test/runtime/VtableTests/VTableTest.java b/test/runtime/VtableTests/VTableTest.java
new file mode 100644
index 0000000..2ca32e4
--- /dev/null
+++ b/test/runtime/VtableTests/VTableTest.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8226798
+ * @summary Check that the vTable for class C gets set up without causing
+ *          an assertion failure.
+ * @compile pkg/A.java
+ * @run main VTableTest
+ */
+
+public class VTableTest {
+
+    interface Intf {
+        public default void m() { }
+        public default void unusedButNeededToReproduceIssue() { }
+    }
+
+    static class B extends pkg.A implements Intf {
+    }
+
+    static class C extends B {
+        public void m() { System.out.println("In C.m()"); }
+    }
+
+    public static void main(String[] args) {
+        new C().m();
+    }
+}
diff --git a/test/runtime/VtableTests/pkg/A.java b/test/runtime/VtableTests/pkg/A.java
new file mode 100644
index 0000000..a4b7f49
--- /dev/null
+++ b/test/runtime/VtableTests/pkg/A.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package pkg;
+
+public class A {
+    void m() { }
+}