Merge tag 'go1.8' into master

Test: prebuilts/build-tools/build-prebuilts.sh
Test: m -j  (with linux build results)
Change-Id: Ibe903dfd1293d1e3da5a43757f9fbdb31a034711
diff --git a/VERSION b/VERSION
index 4fded12..854106e 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-go1.8rc2
\ No newline at end of file
+go1.8
\ No newline at end of file
diff --git a/doc/conduct.html b/doc/conduct.html
index 5b81681..c40b007 100644
--- a/doc/conduct.html
+++ b/doc/conduct.html
@@ -148,29 +148,26 @@
 
 <p>
 The Go spaces are not free speech venues; they are for discussion about Go.
-These spaces have moderators.
-The goal of the moderators is to facilitate civil discussion about Go.
+Each of these spaces have their own moderators.
 </p>
 
 <p>
 When using the official Go spaces you should act in the spirit of the “Gopher
 values”.
-If you conduct yourself in a way that is explicitly forbidden by the CoC,
-you will be warned and asked to stop.
-If you do not stop, you will be removed from our community spaces temporarily.
-Repeated, willful breaches of the CoC will result in a permanent ban.
+If a reported conflict cannot be resolved amicably, the CoC Working Group
+may make a recommendation to the relevant forum moderators.
 </p>
 
 <p>
-Moderators are held to a higher standard than other community members.
-If a moderator creates an inappropriate situation, they should expect less
-leeway than others, and should expect to be removed from their position if they
-cannot adhere to the CoC.
+CoC Working Group members and forum moderators are held to a higher standard than other community members.
+If a working group member or moderator creates an inappropriate situation, they
+should expect less leeway than others, and should expect to be removed from
+their position if they cannot adhere to the CoC.
 </p>
 
 <p>
-Complaints about moderator actions must be handled using the reporting process
-below.
+Complaints about working group member or moderator actions must be handled 
+using the reporting process below.
 </p>
 
 <h2 id="reporting">Reporting issues</h2>
@@ -185,8 +182,6 @@
 <ul>
 	<li>Aditya Mukerjee &lt;dev@chimeracoder.net&gt;
 	<li>Andrew Gerrand &lt;adg@golang.org&gt;
-	<li>Dave Cheney &lt;dave@cheney.net&gt;
-	<li>Jason Buberel &lt;jbuberel@google.com&gt;
 	<li>Peggy Li &lt;peggyli.224@gmail.com&gt;
 	<li>Sarah Adams &lt;sadams.codes@gmail.com&gt;
 	<li>Steve Francia &lt;steve.francia@gmail.com&gt;
@@ -201,13 +196,10 @@
 </p>
 
 <ul>
-<li>Mail <a href="mailto:conduct@golang.org">conduct@golang.org</a> or
-    <a href="https://golang.org/s/conduct-report">submit an anonymous report</a>.
+<li>Mail <a href="mailto:conduct@golang.org">conduct@golang.org</a>.
     <ul>
     <li>Your message will reach the Working Group.
     <li>Reports are confidential within the Working Group.
-    <li>Should you choose to remain anonymous then the Working Group cannot
-        notify you of the outcome of your report.
     <li>You may contact a member of the group directly if you do not feel
         comfortable contacting the group as a whole. That member will then raise
         the issue with the Working Group as a whole, preserving the privacy of the
@@ -229,11 +221,8 @@
 <li>The Working Group will reach a decision as to how to act. These may include:
     <ul>
     <li>Nothing.
-    <li>A request for a private or public apology.
-    <li>A private or public warning.
-    <li>An imposed vacation (for instance, asking someone to abstain for a week
-        from a mailing list or IRC).
-    <li>A permanent or temporary ban from some or all Go spaces.
+    <li>Passing the report along to the offender.
+    <li>A recommendation of action to the relevant forum moderators.
     </ul>
 <li>The Working Group will reach out to the original reporter to let them know
     the decision.
@@ -246,7 +235,6 @@
 conflicts in the most harmonious way possible.</b>
 We hope that in most cases issues may be resolved through polite discussion and
 mutual agreement.
-Bannings and other forceful measures are to be employed only as a last resort.
 </p>
 
 <p>
diff --git a/doc/devel/release.html b/doc/devel/release.html
index 51957df..d046149 100644
--- a/doc/devel/release.html
+++ b/doc/devel/release.html
@@ -30,6 +30,13 @@
 See the <a href="/security">security policy</a> for more details.
 </p>
 
+<h2 id="go1.8">go1.8 (released 2017/02/16)</h2>
+
+<p>
+Go 1.8 is a major release of Go.
+Read the <a href="/doc/go1.8">Go 1.8 Release Notes</a> for more information.
+</p>
+
 <h2 id="go1.7">go1.7 (released 2016/08/15)</h2>
 
 <p>
@@ -69,6 +76,13 @@
 1.7.4 milestone</a> on our issue tracker for details.
 </p>
 
+<p>
+go1.7.5 (released 2017/01/26) includes fixes to the compiler, runtime,
+and the <code>crypto/x509</code> and <code>time</code> packages.
+See the <a href="https://github.com/golang/go/issues?q=milestone%3AGo1.7.5">Go
+1.7.5 milestone</a> on our issue tracker for details.
+</p>
+
 <h2 id="go1.6">go1.6 (released 2016/02/17)</h2>
 
 <p>
diff --git a/doc/gccgo_install.html b/doc/gccgo_install.html
index ef27fd1..4f6a911 100644
--- a/doc/gccgo_install.html
+++ b/doc/gccgo_install.html
@@ -52,6 +52,19 @@
 should not be visible to Go programs.
 </p>
 
+<p>
+The GCC 6 releases include a complete implementation of the Go 1.6.1
+user libraries.  The Go 1.6 runtime is not fully merged, but that
+should not be visible to Go programs.
+</p>
+
+<p>
+The GCC 7 releases are expected to include a complete implementation
+of the Go 1.8 user libraries.  As with earlier releases, the Go 1.8
+runtime is not fully merged, but that should not be visible to Go
+programs.
+</p>
+
 <h2 id="Source_code">Source code</h2>
 
 <p>
@@ -160,23 +173,6 @@
 make install
 </pre>
 
-<h3 id="Ubuntu">A note on Ubuntu</h3>
-
-<p>
-Current versions of Ubuntu and versions of GCC before 4.8 disagree on
-where system libraries and header files are found.  This is not a
-gccgo issue.  When building older versions of GCC, setting these
-environment variables while configuring and building gccgo may fix the
-problem.
-</p>
-
-<pre>
-LIBRARY_PATH=/usr/lib/x86_64-linux-gnu
-C_INCLUDE_PATH=/usr/include/x86_64-linux-gnu
-CPLUS_INCLUDE_PATH=/usr/include/x86_64-linux-gnu
-export LIBRARY_PATH C_INCLUDE_PATH CPLUS_INCLUDE_PATH
-</pre>
-
 <h2 id="Using_gccgo">Using gccgo</h2>
 
 <p>
@@ -364,12 +360,15 @@
 <h3 id="Types">Types</h3>
 
 <p>
-Basic types map directly: an <code>int</code> in Go is an <code>int</code>
-in C, an <code>int32</code> is an <code>int32_t</code>,
-etc.  Go <code>byte</code> is equivalent to C <code>unsigned
-char</code>.
-Pointers in Go are pointers in C. A Go <code>struct</code> is the same as C
-<code>struct</code> with the same fields and types.
+Basic types map directly: an <code>int32</code> in Go is
+an <code>int32_t</code> in C, an <code>int64</code> is
+an <code>int64_t</code>, etc.
+The Go type <code>int</code> is an integer that is the same size as a
+pointer, and as such corresponds to the C type <code>intptr_t</code>.
+Go <code>byte</code> is equivalent to C <code>unsigned char</code>.
+Pointers in Go are pointers in C.
+A Go <code>struct</code> is the same as C <code>struct</code> with the
+same fields and types.
 </p>
 
 <p>
@@ -380,7 +379,7 @@
 <pre>
 struct __go_string {
   const unsigned char *__data;
-  int __length;
+  intptr_t __length;
 };
 </pre>
 
@@ -400,8 +399,8 @@
 <pre>
 struct __go_slice {
   void *__values;
-  int __count;
-  int __capacity;
+  intptr_t __count;
+  intptr_t __capacity;
 };
 </pre>
 
@@ -526,15 +525,3 @@
 guarantee that it will not change in the future. It is more useful as a
 starting point for real Go code than as a regular procedure.
 </p>
-
-<h2 id="RTEMS_Port">RTEMS Port</h2>
-<p>
-The gccgo compiler has been ported to <a href="http://www.rtems.com/">
-<code>RTEMS</code></a>. <code>RTEMS</code> is a real-time executive
-that provides a high performance environment for embedded applications
-on a range of processors and embedded hardware. The current gccgo
-port is for x86. The goal is to extend the port to most of the
-<a href="http://www.rtems.org/wiki/index.php/SupportedCPUs">
-architectures supported by <code>RTEMS</code></a>. For more information on the port,
-as well as instructions on how to install it, please see this
-<a href="http://www.rtems.org/wiki/index.php/GCCGoRTEMS"><code>RTEMS</code> Wiki page</a>.
diff --git a/doc/go1.8.html b/doc/go1.8.html
index 337f13d..cf4c669 100644
--- a/doc/go1.8.html
+++ b/doc/go1.8.html
@@ -15,12 +15,7 @@
 ul li { margin: 0.5em 0; }
 </style>
 
-<h2 id="introduction">DRAFT RELEASE NOTES - Introduction to Go 1.8</h2>
-
-<p><strong>
-Go 1.8 is not yet released. These are work-in-progress
-release notes. Go 1.8 is expected to be released in February 2017.
-</strong></p>
+<h2 id="introduction">Introduction to Go 1.8</h2>
 
 <p>
 The latest Go release, version 1.8, arrives six months after <a href="go1.7">Go 1.7</a>.
@@ -435,11 +430,11 @@
 <h3 id="plugin">Plugins</h3>
 
 <p>
-  Go now supports a “<code>plugin</code>” build mode for generating
-  plugins written in Go, and a
+  Go now provides early support for plugins with a “<code>plugin</code>”
+  build mode for generating plugins written in Go, and a
   new <a href="/pkg/plugin/"><code>plugin</code></a> package for
-  loading such plugins at run time. Plugin support is only currently
-  available on Linux.
+  loading such plugins at run time. Plugin support is currently only
+  available on Linux. Please report any issues.
 </p>
 
 <h2 id="runtime">Runtime</h2>
@@ -799,9 +794,9 @@
       hardware support for AES-GCM is present.
     </p>
 
-    <p> <!-- CL 27315 -->
+    <p> <!-- CL 27315, CL 35290 -->
       AES-128-CBC cipher suites with SHA-256 are also
-      now supported.
+      now supported, but disabled by default.
     </p>
 
   </dd>
@@ -859,11 +854,12 @@
     <p>
       The <a href="/pkg/database/sql#IsolationLevel"><code>IsolationLevel</code></a>
       can now be set when starting a transaction by setting the isolation level
-      on the <code>Context</code> then passing that <code>Context</code> to
-      <a href="/pkg/database/sql#DB.BeginContext"><code>DB.BeginContext</code></a>.
+      on <a href="/pkg/database/sql#TxOptions.Isolation"><code>TxOptions.Isolation</code></a> and passing
+      it to <a href="/pkg/database/sql#DB.BeginTx"><code>DB.BeginTx</code></a>.
       An error will be returned if an isolation level is selected that the driver
       does not support. A read-only attribute may also be set on the transaction
-      with <a href="/pkg/database/sql/#ReadOnlyContext"><code>ReadOnlyContext</code></a>.
+      by setting <a href="/pkg/database/sql/#TxOptions.ReadOnly"><code>TxOptions.ReadOnly</code></a>
+      to true.
     </p>
     <p>
       Queries now expose the SQL column type information for drivers that support it.
@@ -1645,6 +1641,17 @@
       and only the overall execution of the test binary would fail.
     </p>
 
+    <p><!-- CL 32455 -->
+      The signature of the
+      <a href="/pkg/testing/#MainStart"><code>MainStart</code></a>
+      function has changed, as allowed by the documentation. It is an
+      internal detail and not part of the Go 1 compatibility promise.
+      If you're not calling <code>MainStart</code> directly but see
+      errors, that likely means you set the
+      normally-empty <code>GOROOT</code> environment variable and it
+      doesn't match the version of your <code>go</code> command's binary.
+    </p>
+
   </dd>
 </dl>
 
diff --git a/doc/install-source.html b/doc/install-source.html
index 4bf0ba3..45c5bbb 100644
--- a/doc/install-source.html
+++ b/doc/install-source.html
@@ -147,6 +147,9 @@
 which contains the Go 1.4 source code plus accumulated fixes
 to keep the tools running on newer operating systems.
 (Go 1.4 was the last distribution in which the tool chain was written in C.)
+After unpacking the Go 1.4 source, <code>cd</code> to
+the <code>src</code> subdirectory and run <code>make.bash</code> (or,
+on Windows, <code>make.bat</code>).
 </p>
 
 <p>
@@ -218,7 +221,7 @@
 Change to the directory that will be its parent
 and make sure the <code>go</code> directory does not exist.
 Then clone the repository and check out the latest release tag
-(<code class="versionTag">go1.7.4</code>, for example):</p>
+(<code class="versionTag">go1.8</code>, for example):</p>
 
 <pre>
 $ git clone https://go.googlesource.com/go
@@ -406,7 +409,7 @@
 <a href="//groups.google.com/group/golang-announce">golang-announce</a>
 mailing list.
 Each announcement mentions the latest release tag, for instance,
-<code class="versionTag">go1.7.4</code>.
+<code class="versionTag">go1.8</code>.
 </p>
 
 <p>
diff --git a/misc/cgo/test/issue18146.go b/misc/cgo/test/issue18146.go
index ffb04e9..3c60046 100644
--- a/misc/cgo/test/issue18146.go
+++ b/misc/cgo/test/issue18146.go
@@ -73,7 +73,7 @@
 		}
 		runtime.GOMAXPROCS(threads)
 		argv := append(os.Args, "-test.run=NoSuchTestExists")
-		if err := syscall.Exec(os.Args[0], argv, nil); err != nil {
+		if err := syscall.Exec(os.Args[0], argv, os.Environ()); err != nil {
 			t.Fatal(err)
 		}
 	}
diff --git a/misc/cgo/test/sigaltstack.go b/misc/cgo/test/sigaltstack.go
index b16adc7..2b7a1ec 100644
--- a/misc/cgo/test/sigaltstack.go
+++ b/misc/cgo/test/sigaltstack.go
@@ -17,7 +17,7 @@
 static stack_t oss;
 static char signalStack[SIGSTKSZ];
 
-static void changeSignalStack() {
+static void changeSignalStack(void) {
 	stack_t ss;
 	memset(&ss, 0, sizeof ss);
 	ss.ss_sp = signalStack;
@@ -29,7 +29,7 @@
 	}
 }
 
-static void restoreSignalStack() {
+static void restoreSignalStack(void) {
 #if (defined(__x86_64__) || defined(__i386__)) && defined(__APPLE__)
 	// The Darwin C library enforces a minimum that the kernel does not.
 	// This is OK since we allocated this much space in mpreinit,
@@ -42,7 +42,7 @@
 	}
 }
 
-static int zero() {
+static int zero(void) {
 	return 0;
 }
 */
diff --git a/misc/cgo/testsanitizers/msan_shared.go b/misc/cgo/testsanitizers/msan_shared.go
new file mode 100644
index 0000000..966947c
--- /dev/null
+++ b/misc/cgo/testsanitizers/msan_shared.go
@@ -0,0 +1,12 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This program segfaulted during libpreinit when built with -msan:
+// http://golang.org/issue/18707
+
+package main
+
+import "C"
+
+func main() {}
diff --git a/misc/cgo/testsanitizers/test.bash b/misc/cgo/testsanitizers/test.bash
index dfc6d38..4da8502 100755
--- a/misc/cgo/testsanitizers/test.bash
+++ b/misc/cgo/testsanitizers/test.bash
@@ -68,6 +68,25 @@
 
 status=0
 
+testmsanshared() {
+  goos=$(go env GOOS)
+  suffix="-installsuffix testsanitizers"
+  libext="so"
+  if [ "$goos" == "darwin" ]; then
+	  libext="dylib"
+  fi
+  go build -msan -buildmode=c-shared $suffix -o ${TMPDIR}/libmsanshared.$libext msan_shared.go
+
+	echo 'int main() { return 0; }' > ${TMPDIR}/testmsanshared.c
+  $CC $(go env GOGCCFLAGS) -fsanitize=memory -o ${TMPDIR}/testmsanshared ${TMPDIR}/testmsanshared.c ${TMPDIR}/libmsanshared.$libext
+
+  if ! LD_LIBRARY_PATH=. ${TMPDIR}/testmsanshared; then
+    echo "FAIL: msan_shared"
+    status=1
+  fi
+  rm -f ${TMPDIR}/{testmsanshared,testmsanshared.c,libmsanshared.$libext}
+}
+
 if test "$msan" = "yes"; then
     if ! go build -msan std; then
 	echo "FAIL: build -msan std"
@@ -108,6 +127,8 @@
 	echo "FAIL: msan_fail"
 	status=1
     fi
+
+    testmsanshared
 fi
 
 if test "$tsan" = "yes"; then
diff --git a/misc/cgo/testshared/src/depBase/dep.go b/misc/cgo/testshared/src/depBase/dep.go
index a518b4e..9f86710 100644
--- a/misc/cgo/testshared/src/depBase/dep.go
+++ b/misc/cgo/testshared/src/depBase/dep.go
@@ -5,6 +5,8 @@
 	"reflect"
 )
 
+var SlicePtr interface{} = &[]int{}
+
 var V int = 1
 
 var HasMask []string = []string{"hi"}
diff --git a/misc/cgo/testshared/src/exe/exe.go b/misc/cgo/testshared/src/exe/exe.go
index 4337271..84302a8 100644
--- a/misc/cgo/testshared/src/exe/exe.go
+++ b/misc/cgo/testshared/src/exe/exe.go
@@ -19,6 +19,8 @@
 	return nil
 }
 
+var slicePtr interface{} = &[]int{}
+
 func main() {
 	defer depBase.ImplementedInAsm()
 	// This code below causes various go.itab.* symbols to be generated in
@@ -32,4 +34,11 @@
 	if reflect.TypeOf(F).Out(0) != reflect.TypeOf(c) {
 		panic("bad reflection results, see golang.org/issue/18252")
 	}
+
+	sp := reflect.New(reflect.TypeOf(slicePtr).Elem())
+	s := sp.Interface()
+
+	if reflect.TypeOf(s) != reflect.TypeOf(slicePtr) {
+		panic("bad reflection results, see golang.org/issue/18729")
+	}
 }
diff --git a/src/cmd/compile/internal/gc/sinit.go b/src/cmd/compile/internal/gc/sinit.go
index 6b3c426..416e2b2 100644
--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@@ -585,7 +585,7 @@
 }
 
 func (n *Node) isSimpleName() bool {
-	return n.Op == ONAME && n.Addable && n.Class != PAUTOHEAP
+	return n.Op == ONAME && n.Addable && n.Class != PAUTOHEAP && n.Class != PEXTERN
 }
 
 func litas(l *Node, r *Node, init *Nodes) {
diff --git a/src/cmd/compile/internal/gc/util.go b/src/cmd/compile/internal/gc/util.go
index bb5cede..c62bd00 100644
--- a/src/cmd/compile/internal/gc/util.go
+++ b/src/cmd/compile/internal/gc/util.go
@@ -57,8 +57,13 @@
 			Fatalf("%v", err)
 		}
 		atExit(func() {
-			runtime.GC() // profile all outstanding allocations
-			if err := pprof.WriteHeapProfile(f); err != nil {
+			// Profile all outstanding allocations.
+			runtime.GC()
+			// compilebench parses the memory profile to extract memstats,
+			// which are only written in the legacy pprof format.
+			// See golang.org/issue/18641 and runtime/pprof/pprof.go:writeHeap.
+			const writeLegacyFormat = 1
+			if err := pprof.Lookup("heap").WriteTo(f, writeLegacyFormat); err != nil {
 				Fatalf("%v", err)
 			}
 		})
diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go
index e2d3c28..a9dafa8 100644
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@@ -424,7 +424,7 @@
 		p.To.Type = obj.TYPE_MEM
 		p.To.Reg = v.Args[0].Reg()
 		gc.AddAux2(&p.To, v, sc.Off())
-	case ssa.OpCopy, ssa.OpS390XMOVDconvert:
+	case ssa.OpCopy, ssa.OpS390XMOVDconvert, ssa.OpS390XMOVDreg:
 		if v.Type.IsMemory() {
 			return
 		}
@@ -433,6 +433,11 @@
 		if x != y {
 			opregreg(moveByType(v.Type), y, x)
 		}
+	case ssa.OpS390XMOVDnop:
+		if v.Reg() != v.Args[0].Reg() {
+			v.Fatalf("input[0] and output not in same register %s", v.LongString())
+		}
+		// nothing to do
 	case ssa.OpLoadReg:
 		if v.Type.IsFlags() {
 			v.Fatalf("load flags not implemented: %v", v.LongString())
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index c36b6f7..940c060 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -529,109 +529,147 @@
 // can be encoded in the instructions
 // since this rewriting takes place before stack allocation, the offset to SP is unknown,
 // so don't do it for args and locals with unaligned offset
-(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBload [off1+off2] {sym} ptr mem)
-(MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBUload [off1+off2] {sym} ptr mem)
+(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+	(MOVBload [off1+off2] {sym} ptr mem)
+(MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+	(MOVBUload [off1+off2] {sym} ptr mem)
 (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVHload [off1+off2] {sym} ptr mem)
 (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVHUload [off1+off2] {sym} ptr mem)
 (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVWload [off1+off2] {sym} ptr mem)
 (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVWUload [off1+off2] {sym} ptr mem)
 (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVDload [off1+off2] {sym} ptr mem)
 (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(FMOVSload [off1+off2] {sym} ptr mem)
 (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(FMOVDload [off1+off2] {sym} ptr mem)
 
-(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVBstore [off1+off2] {sym} ptr val mem)
+(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2) ->
+	(MOVBstore [off1+off2] {sym} ptr val mem)
 (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	&& (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVHstore [off1+off2] {sym} ptr val mem)
 (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	&& (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVWstore [off1+off2] {sym} ptr val mem)
 (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	&& (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVDstore [off1+off2] {sym} ptr val mem)
 (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	&& (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(FMOVSstore [off1+off2] {sym} ptr val mem)
 (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	&& (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(FMOVDstore [off1+off2] {sym} ptr val mem)
-(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBstorezero [off1+off2] {sym} ptr mem)
+(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+	(MOVBstorezero [off1+off2] {sym} ptr mem)
 (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVHstorezero [off1+off2] {sym} ptr mem)
 (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVWstorezero [off1+off2] {sym} ptr mem)
 (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	&& (off1+off2)%2==8 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym) ->
+	&& is32Bit(off1+off2)
+	&& ((off1+off2)%2==8 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)) ->
 	(MOVDstorezero [off1+off2] {sym} ptr mem)
 
-(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2) ->
 	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
-(MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+(MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2) ->
 	(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 
-(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
+(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2) ->
 	(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
-(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2) ->
 	(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
+	&& is32Bit(off1+off2)
 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1)) ->
 	(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 0e0f1f9..cad753e 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -338,9 +338,9 @@
 (Geq32F x y) -> (FGreaterEqual (FCMPU x y))
 (Geq64F x y) -> (FGreaterEqual (FCMPU x y))
 
-(Geq8U x y)  -> (GreaterEqual (CMPU (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Geq16U x y) -> (GreaterEqual (CMPU (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Geq32U x y) -> (GreaterEqual (CMPU x y))
+(Geq8U x y)  -> (GreaterEqual (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
+(Geq16U x y) -> (GreaterEqual (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
+(Geq32U x y) -> (GreaterEqual (CMPWU x y))
 (Geq64U x y) -> (GreaterEqual (CMPU x y))
 
 // Absorb pseudo-ops into blocks.
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 3e0533a..c26515c 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -312,9 +312,12 @@
 
 // Lowering loads
 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
-(Load <t> ptr mem) && is32BitInt(t) -> (MOVWZload ptr mem)
-(Load <t> ptr mem) && is16BitInt(t) -> (MOVHZload ptr mem)
-(Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBZload ptr mem)
+(Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
+(Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
+(Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
+(Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
+(Load <t> ptr mem) && is8BitInt(t) && isSigned(t) -> (MOVBload ptr mem)
+(Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && !isSigned(t))) -> (MOVBZload ptr mem)
 (Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
 (Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
 
@@ -445,16 +448,20 @@
 // ***************************
 // TODO: Should the optimizations be a separate pass?
 
+// if a register move has only 1 use, just use the same register without emitting instruction
+// MOVDnop doesn't emit instruction, only for ensuring the type.
+(MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
+
 // Fold sign extensions into conditional moves of constants.
 // Designed to remove the MOVBZreg inserted by the If lowering.
-(MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
-(MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> x
+(MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
+(MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
 
 // Fold boolean tests into blocks.
 (NE (CMPWconst [0] (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LT cmp yes no)
@@ -572,46 +579,46 @@
 (MOVDNE x y (InvertFlags cmp)) -> (MOVDNE x y cmp)
 
 // don't extend after proper load
-(MOVBreg x:(MOVBload _ _)) -> x
-(MOVBZreg x:(MOVBZload _ _)) -> x
-(MOVHreg x:(MOVBload _ _)) -> x
-(MOVHreg x:(MOVBZload _ _)) -> x
-(MOVHreg x:(MOVHload _ _)) -> x
-(MOVHZreg x:(MOVBZload _ _)) -> x
-(MOVHZreg x:(MOVHZload _ _)) -> x
-(MOVWreg x:(MOVBload _ _)) -> x
-(MOVWreg x:(MOVBZload _ _)) -> x
-(MOVWreg x:(MOVHload _ _)) -> x
-(MOVWreg x:(MOVHZload _ _)) -> x
-(MOVWreg x:(MOVWload _ _)) -> x
-(MOVWZreg x:(MOVBZload _ _)) -> x
-(MOVWZreg x:(MOVHZload _ _)) -> x
-(MOVWZreg x:(MOVWZload _ _)) -> x
+(MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
+(MOVBZreg x:(MOVBZload _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVBload _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVBZload _ _)) -> (MOVDreg x)
+(MOVHreg x:(MOVHload _ _)) -> (MOVDreg x)
+(MOVHZreg x:(MOVBZload _ _)) -> (MOVDreg x)
+(MOVHZreg x:(MOVHZload _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVBload _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVBZload _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHload _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHZload _ _)) -> (MOVDreg x)
+(MOVWreg x:(MOVWload _ _)) -> (MOVDreg x)
+(MOVWZreg x:(MOVBZload _ _)) -> (MOVDreg x)
+(MOVWZreg x:(MOVHZload _ _)) -> (MOVDreg x)
+(MOVWZreg x:(MOVWZload _ _)) -> (MOVDreg x)
 
 // don't extend if argument is already extended
-(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> x
-(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> x
-(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> x
-(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> x
-(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> x
-(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> x
+(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> (MOVDreg x)
+(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> (MOVDreg x)
+(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> (MOVDreg x)
+(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> (MOVDreg x)
+(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> (MOVDreg x)
+(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> (MOVDreg x)
 
 // fold double extensions
-(MOVBreg x:(MOVBreg _)) -> x
-(MOVBZreg x:(MOVBZreg _)) -> x
-(MOVHreg x:(MOVBreg _)) -> x
-(MOVHreg x:(MOVBZreg _)) -> x
-(MOVHreg x:(MOVHreg _)) -> x
-(MOVHZreg x:(MOVBZreg _)) -> x
-(MOVHZreg x:(MOVHZreg _)) -> x
-(MOVWreg x:(MOVBreg _)) -> x
-(MOVWreg x:(MOVBZreg _)) -> x
-(MOVWreg x:(MOVHreg _)) -> x
-(MOVWreg x:(MOVHreg _)) -> x
-(MOVWreg x:(MOVWreg _)) -> x
-(MOVWZreg x:(MOVBZreg _)) -> x
-(MOVWZreg x:(MOVHZreg _)) -> x
-(MOVWZreg x:(MOVWZreg _)) -> x
+(MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
+(MOVBZreg x:(MOVBZreg _)) -> (MOVDreg x)
+(MOVHreg x:(MOVBreg _)) -> (MOVDreg x)
+(MOVHreg x:(MOVBZreg _)) -> (MOVDreg x)
+(MOVHreg x:(MOVHreg _)) -> (MOVDreg x)
+(MOVHZreg x:(MOVBZreg _)) -> (MOVDreg x)
+(MOVHZreg x:(MOVHZreg _)) -> (MOVDreg x)
+(MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
+(MOVWreg x:(MOVBZreg _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
+(MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
+(MOVWZreg x:(MOVBZreg _)) -> (MOVDreg x)
+(MOVWZreg x:(MOVHZreg _)) -> (MOVDreg x)
+(MOVWZreg x:(MOVWZreg _)) -> (MOVDreg x)
 
 // fold extensions into constants
 (MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
@@ -641,10 +648,10 @@
 (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
 
 // replace load from same location as preceding store with copy
-(MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
-(MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
-(MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
-(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
+(MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x)
+(MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x)
+(MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x)
+(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x)
 
 // Don't extend before storing
 (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
@@ -885,9 +892,9 @@
 (MOVDEQ y _ (FlagLT)) -> y
 (MOVDEQ y _ (FlagGT)) -> y
 
-(MOVDNE _ y (FlagEQ)) -> y
-(MOVDNE x _ (FlagLT)) -> x
-(MOVDNE x _ (FlagGT)) -> x
+(MOVDNE y _ (FlagEQ)) -> y
+(MOVDNE _ x (FlagLT)) -> x
+(MOVDNE _ x (FlagGT)) -> x
 
 (MOVDLT y _ (FlagEQ)) -> y
 (MOVDLT _ x (FlagLT)) -> x
diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go
index 7a25c26..29383f6 100644
--- a/src/cmd/compile/internal/ssa/gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go
@@ -311,6 +311,9 @@
 		{name: "MOVHZreg", argLength: 1, reg: gp11sp, asm: "MOVHZ", typ: "UInt64"}, // zero extend arg0 from int16 to int64
 		{name: "MOVWreg", argLength: 1, reg: gp11sp, asm: "MOVW", typ: "Int64"},    // sign extend arg0 from int32 to int64
 		{name: "MOVWZreg", argLength: 1, reg: gp11sp, asm: "MOVWZ", typ: "UInt64"}, // zero extend arg0 from int32 to int64
+		{name: "MOVDreg", argLength: 1, reg: gp11sp, asm: "MOVD"},                  // move from arg0
+
+		{name: "MOVDnop", argLength: 1, reg: gp11, resultInArg0: true}, // nop, return arg0 in same register
 
 		{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
 
diff --git a/src/cmd/compile/internal/ssa/nilcheck.go b/src/cmd/compile/internal/ssa/nilcheck.go
index 9f58db6..0a34cd1 100644
--- a/src/cmd/compile/internal/ssa/nilcheck.go
+++ b/src/cmd/compile/internal/ssa/nilcheck.go
@@ -82,7 +82,7 @@
 				}
 			}
 
-			// Next, process values in the block.
+			// Next, eliminate any redundant nil checks in this block.
 			i := 0
 			for _, v := range b.Values {
 				b.Values[i] = v
@@ -105,13 +105,10 @@
 							f.Config.Warnl(v.Line, "removed nil check")
 						}
 						v.reset(OpUnknown)
+						// TODO: f.freeValue(v)
 						i--
 						continue
 					}
-					// Record the fact that we know ptr is non nil, and remember to
-					// undo that information when this dominator subtree is done.
-					nonNilValues[ptr.ID] = true
-					work = append(work, bp{op: ClearPtr, ptr: ptr})
 				}
 			}
 			for j := i; j < len(b.Values); j++ {
@@ -119,6 +116,21 @@
 			}
 			b.Values = b.Values[:i]
 
+			// Finally, find redundant nil checks for subsequent blocks.
+			// Note that we can't add these until the loop above is done, as the
+			// values in the block are not ordered in any way when this pass runs.
+			// This was the cause of issue #18725.
+			for _, v := range b.Values {
+				if v.Op != OpNilCheck {
+					continue
+				}
+				ptr := v.Args[0]
+				// Record the fact that we know ptr is non nil, and remember to
+				// undo that information when this dominator subtree is done.
+				nonNilValues[ptr.ID] = true
+				work = append(work, bp{op: ClearPtr, ptr: ptr})
+			}
+
 			// Add all dominated blocks to the work list.
 			for w := sdom[node.block.ID].child; w != nil; w = sdom[w.ID].sibling {
 				work = append(work, bp{op: Work, block: w})
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index a63c5b9..9d11d03 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1473,6 +1473,8 @@
 	OpS390XMOVHZreg
 	OpS390XMOVWreg
 	OpS390XMOVWZreg
+	OpS390XMOVDreg
+	OpS390XMOVDnop
 	OpS390XMOVDconst
 	OpS390XCFDBRA
 	OpS390XCGDBRA
@@ -18571,6 +18573,32 @@
 		},
 	},
 	{
+		name:   "MOVDreg",
+		argLen: 1,
+		asm:    s390x.AMOVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 54271}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
+			},
+			outputs: []outputInfo{
+				{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+			},
+		},
+	},
+	{
+		name:         "MOVDnop",
+		argLen:       1,
+		resultInArg0: true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+			},
+			outputs: []outputInfo{
+				{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+			},
+		},
+	},
+	{
 		name:              "MOVDconst",
 		auxType:           auxInt64,
 		argLen:            0,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index dd5aa28..862c645 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -2625,7 +2625,7 @@
 	b := v.Block
 	_ = b
 	// match: (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (FMOVDload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -2637,7 +2637,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64FMOVDload)
@@ -2648,7 +2648,7 @@
 		return true
 	}
 	// match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -2661,7 +2661,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64FMOVDload)
@@ -2677,7 +2677,7 @@
 	b := v.Block
 	_ = b
 	// match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	// cond: (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (FMOVDstore [off1+off2] {sym} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -2690,7 +2690,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64FMOVDstore)
@@ -2702,7 +2702,7 @@
 		return true
 	}
 	// match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -2716,7 +2716,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64FMOVDstore)
@@ -2733,7 +2733,7 @@
 	b := v.Block
 	_ = b
 	// match: (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (FMOVSload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -2745,7 +2745,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64FMOVSload)
@@ -2756,7 +2756,7 @@
 		return true
 	}
 	// match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -2769,7 +2769,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64FMOVSload)
@@ -2785,7 +2785,7 @@
 	b := v.Block
 	_ = b
 	// match: (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	// cond: (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (FMOVSstore [off1+off2] {sym} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -2798,7 +2798,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64FMOVSstore)
@@ -2810,7 +2810,7 @@
 		return true
 	}
 	// match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -2824,7 +2824,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64FMOVSstore)
@@ -3511,7 +3511,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond:
+	// cond: is32Bit(off1+off2)
 	// result: (MOVBUload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3523,6 +3523,9 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
+		if !(is32Bit(off1 + off2)) {
+			break
+		}
 		v.reset(OpARM64MOVBUload)
 		v.AuxInt = off1 + off2
 		v.Aux = sym
@@ -3531,7 +3534,7 @@
 		return true
 	}
 	// match: (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2)
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2)
 	// result: (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3544,7 +3547,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2)) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
 			break
 		}
 		v.reset(OpARM64MOVBUload)
@@ -3623,7 +3626,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond:
+	// cond: is32Bit(off1+off2)
 	// result: (MOVBload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3635,6 +3638,9 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
+		if !(is32Bit(off1 + off2)) {
+			break
+		}
 		v.reset(OpARM64MOVBload)
 		v.AuxInt = off1 + off2
 		v.Aux = sym
@@ -3643,7 +3649,7 @@
 		return true
 	}
 	// match: (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2)
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2)
 	// result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3656,7 +3662,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2)) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
 			break
 		}
 		v.reset(OpARM64MOVBload)
@@ -3735,7 +3741,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	// cond:
+	// cond: is32Bit(off1+off2)
 	// result: (MOVBstore [off1+off2] {sym} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -3748,6 +3754,9 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
+		if !(is32Bit(off1 + off2)) {
+			break
+		}
 		v.reset(OpARM64MOVBstore)
 		v.AuxInt = off1 + off2
 		v.Aux = sym
@@ -3757,7 +3766,7 @@
 		return true
 	}
 	// match: (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2)
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2)
 	// result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -3771,7 +3780,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2)) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
 			break
 		}
 		v.reset(OpARM64MOVBstore)
@@ -3936,7 +3945,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond:
+	// cond: is32Bit(off1+off2)
 	// result: (MOVBstorezero [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3948,6 +3957,9 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
+		if !(is32Bit(off1 + off2)) {
+			break
+		}
 		v.reset(OpARM64MOVBstorezero)
 		v.AuxInt = off1 + off2
 		v.Aux = sym
@@ -3956,7 +3968,7 @@
 		return true
 	}
 	// match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2)
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2)
 	// result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3969,7 +3981,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2)) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) {
 			break
 		}
 		v.reset(OpARM64MOVBstorezero)
@@ -3985,7 +3997,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVDload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -3997,7 +4009,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVDload)
@@ -4008,7 +4020,7 @@
 		return true
 	}
 	// match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4021,7 +4033,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVDload)
@@ -4088,7 +4100,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	// cond: (off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVDstore [off1+off2] {sym} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -4101,7 +4113,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVDstore)
@@ -4113,7 +4125,7 @@
 		return true
 	}
 	// match: (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -4127,7 +4139,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVDstore)
@@ -4166,7 +4178,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%2==8 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%2==8 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVDstorezero [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4178,7 +4190,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%2 == 8 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%2 == 8 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVDstorezero)
@@ -4189,7 +4201,7 @@
 		return true
 	}
 	// match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%8==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4202,7 +4214,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%8 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVDstorezero)
@@ -4218,7 +4230,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVHUload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4230,7 +4242,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVHUload)
@@ -4241,7 +4253,7 @@
 		return true
 	}
 	// match: (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4254,7 +4266,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVHUload)
@@ -4357,7 +4369,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVHload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4369,7 +4381,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVHload)
@@ -4380,7 +4392,7 @@
 		return true
 	}
 	// match: (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4393,7 +4405,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVHload)
@@ -4520,7 +4532,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	// cond: (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVHstore [off1+off2] {sym} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -4533,7 +4545,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVHstore)
@@ -4545,7 +4557,7 @@
 		return true
 	}
 	// match: (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -4559,7 +4571,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVHstore)
@@ -4682,7 +4694,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVHstorezero [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4694,7 +4706,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVHstorezero)
@@ -4705,7 +4717,7 @@
 		return true
 	}
 	// match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%2==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4718,7 +4730,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%2 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVHstorezero)
@@ -4734,7 +4746,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVWUload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4746,7 +4758,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVWUload)
@@ -4757,7 +4769,7 @@
 		return true
 	}
 	// match: (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4770,7 +4782,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVWUload)
@@ -4897,7 +4909,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVWload [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4909,7 +4921,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVWload)
@@ -4920,7 +4932,7 @@
 		return true
 	}
 	// match: (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -4933,7 +4945,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVWload)
@@ -5108,7 +5120,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem)
-	// cond: (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVWstore [off1+off2] {sym} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -5121,7 +5133,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVWstore)
@@ -5133,7 +5145,7 @@
 		return true
 	}
 	// match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 	for {
 		off1 := v.AuxInt
@@ -5147,7 +5159,7 @@
 		ptr := v_0.Args[0]
 		val := v.Args[1]
 		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVWstore)
@@ -5228,7 +5240,7 @@
 	b := v.Block
 	_ = b
 	// match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
-	// cond: (off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym)
+	// cond: is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym) && !isAuto(sym))
 	// result: (MOVWstorezero [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -5240,7 +5252,7 @@
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym)) {
+		if !(is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym) && !isAuto(sym))) {
 			break
 		}
 		v.reset(OpARM64MOVWstorezero)
@@ -5251,7 +5263,7 @@
 		return true
 	}
 	// match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
-	// cond: canMergeSym(sym1,sym2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
+	// cond: canMergeSym(sym1,sym2) 	&& is32Bit(off1+off2) 	&& ((off1+off2)%4==0 || off1+off2<256 && off1+off2>-256 && !isArg(sym1) && !isAuto(sym1))
 	// result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
@@ -5264,7 +5276,7 @@
 		sym2 := v_0.Aux
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		if !(canMergeSym(sym1, sym2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
+		if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2) && ((off1+off2)%4 == 0 || off1+off2 < 256 && off1+off2 > -256 && !isArg(sym1) && !isAuto(sym1))) {
 			break
 		}
 		v.reset(OpARM64MOVWstorezero)
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 8c8373b..031459c 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -1543,12 +1543,12 @@
 	_ = b
 	// match: (Geq16U x y)
 	// cond:
-	// result: (GreaterEqual (CMPU (ZeroExt16to32 x) (ZeroExt16to32 y)))
+	// result: (GreaterEqual (CMPWU (ZeroExt16to32 x) (ZeroExt16to32 y)))
 	for {
 		x := v.Args[0]
 		y := v.Args[1]
 		v.reset(OpPPC64GreaterEqual)
-		v0 := b.NewValue0(v.Line, OpPPC64CMPU, TypeFlags)
+		v0 := b.NewValue0(v.Line, OpPPC64CMPWU, TypeFlags)
 		v1 := b.NewValue0(v.Line, OpZeroExt16to32, config.fe.TypeUInt32())
 		v1.AddArg(x)
 		v0.AddArg(v1)
@@ -1598,12 +1598,12 @@
 	_ = b
 	// match: (Geq32U x y)
 	// cond:
-	// result: (GreaterEqual (CMPU x y))
+	// result: (GreaterEqual (CMPWU x y))
 	for {
 		x := v.Args[0]
 		y := v.Args[1]
 		v.reset(OpPPC64GreaterEqual)
-		v0 := b.NewValue0(v.Line, OpPPC64CMPU, TypeFlags)
+		v0 := b.NewValue0(v.Line, OpPPC64CMPWU, TypeFlags)
 		v0.AddArg(x)
 		v0.AddArg(y)
 		v.AddArg(v0)
@@ -1687,12 +1687,12 @@
 	_ = b
 	// match: (Geq8U x y)
 	// cond:
-	// result: (GreaterEqual (CMPU (ZeroExt8to32 x) (ZeroExt8to32 y)))
+	// result: (GreaterEqual (CMPWU (ZeroExt8to32 x) (ZeroExt8to32 y)))
 	for {
 		x := v.Args[0]
 		y := v.Args[1]
 		v.reset(OpPPC64GreaterEqual)
-		v0 := b.NewValue0(v.Line, OpPPC64CMPU, TypeFlags)
+		v0 := b.NewValue0(v.Line, OpPPC64CMPWU, TypeFlags)
 		v1 := b.NewValue0(v.Line, OpZeroExt8to32, config.fe.TypeUInt32())
 		v1.AddArg(x)
 		v0.AddArg(v1)
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index 7d023bc..0425ced 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -524,6 +524,8 @@
 		return rewriteValueS390X_OpS390XMOVDload(v, config)
 	case OpS390XMOVDloadidx:
 		return rewriteValueS390X_OpS390XMOVDloadidx(v, config)
+	case OpS390XMOVDreg:
+		return rewriteValueS390X_OpS390XMOVDreg(v, config)
 	case OpS390XMOVDstore:
 		return rewriteValueS390X_OpS390XMOVDstore(v, config)
 	case OpS390XMOVDstoreconst:
@@ -3236,13 +3238,28 @@
 		return true
 	}
 	// match: (Load <t> ptr mem)
-	// cond: is32BitInt(t)
+	// cond: is32BitInt(t) && isSigned(t)
+	// result: (MOVWload ptr mem)
+	for {
+		t := v.Type
+		ptr := v.Args[0]
+		mem := v.Args[1]
+		if !(is32BitInt(t) && isSigned(t)) {
+			break
+		}
+		v.reset(OpS390XMOVWload)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (Load <t> ptr mem)
+	// cond: is32BitInt(t) && !isSigned(t)
 	// result: (MOVWZload ptr mem)
 	for {
 		t := v.Type
 		ptr := v.Args[0]
 		mem := v.Args[1]
-		if !(is32BitInt(t)) {
+		if !(is32BitInt(t) && !isSigned(t)) {
 			break
 		}
 		v.reset(OpS390XMOVWZload)
@@ -3251,13 +3268,28 @@
 		return true
 	}
 	// match: (Load <t> ptr mem)
-	// cond: is16BitInt(t)
+	// cond: is16BitInt(t) && isSigned(t)
+	// result: (MOVHload ptr mem)
+	for {
+		t := v.Type
+		ptr := v.Args[0]
+		mem := v.Args[1]
+		if !(is16BitInt(t) && isSigned(t)) {
+			break
+		}
+		v.reset(OpS390XMOVHload)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (Load <t> ptr mem)
+	// cond: is16BitInt(t) && !isSigned(t)
 	// result: (MOVHZload ptr mem)
 	for {
 		t := v.Type
 		ptr := v.Args[0]
 		mem := v.Args[1]
-		if !(is16BitInt(t)) {
+		if !(is16BitInt(t) && !isSigned(t)) {
 			break
 		}
 		v.reset(OpS390XMOVHZload)
@@ -3266,13 +3298,28 @@
 		return true
 	}
 	// match: (Load <t> ptr mem)
-	// cond: (t.IsBoolean() || is8BitInt(t))
+	// cond: is8BitInt(t) && isSigned(t)
+	// result: (MOVBload ptr mem)
+	for {
+		t := v.Type
+		ptr := v.Args[0]
+		mem := v.Args[1]
+		if !(is8BitInt(t) && isSigned(t)) {
+			break
+		}
+		v.reset(OpS390XMOVBload)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (Load <t> ptr mem)
+	// cond: (t.IsBoolean() || (is8BitInt(t) && !isSigned(t)))
 	// result: (MOVBZload ptr mem)
 	for {
 		t := v.Type
 		ptr := v.Args[0]
 		mem := v.Args[1]
-		if !(t.IsBoolean() || is8BitInt(t)) {
+		if !(t.IsBoolean() || (is8BitInt(t) && !isSigned(t))) {
 			break
 		}
 		v.reset(OpS390XMOVBZload)
@@ -7802,7 +7849,7 @@
 	_ = b
 	// match: (MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		off := v.AuxInt
 		sym := v.Aux
@@ -7818,8 +7865,7 @@
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -7976,7 +8022,7 @@
 	_ = b
 	// match: (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDLT {
@@ -7995,14 +8041,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDLE {
@@ -8021,14 +8066,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDGT {
@@ -8047,14 +8091,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDGE {
@@ -8073,14 +8116,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDEQ {
@@ -8099,14 +8141,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDNE {
@@ -8125,14 +8166,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDGTnoinv {
@@ -8151,14 +8191,13 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _))
 	// cond: int64(uint8(c)) == c && int64(uint8(d)) == d
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVDGEnoinv {
@@ -8177,27 +8216,25 @@
 		if !(int64(uint8(c)) == c && int64(uint8(d)) == d) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVBZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(Arg <t>))
 	// cond: is8BitInt(t) && !isSigned(t)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpArg {
@@ -8207,21 +8244,19 @@
 		if !(is8BitInt(t) && !isSigned(t)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBZreg x:(MOVBZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -8349,20 +8384,19 @@
 	_ = b
 	// match: (MOVBreg x:(MOVBload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBreg x:(Arg <t>))
 	// cond: is8BitInt(t) && isSigned(t)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpArg {
@@ -8372,21 +8406,19 @@
 		if !(is8BitInt(t) && isSigned(t)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVBreg x:(MOVBreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -9847,11 +9879,11 @@
 		v.AddArg(cmp)
 		return true
 	}
-	// match: (MOVDNE _ y (FlagEQ))
+	// match: (MOVDNE y _ (FlagEQ))
 	// cond:
 	// result: y
 	for {
-		y := v.Args[1]
+		y := v.Args[0]
 		v_2 := v.Args[2]
 		if v_2.Op != OpS390XFlagEQ {
 			break
@@ -9861,11 +9893,11 @@
 		v.AddArg(y)
 		return true
 	}
-	// match: (MOVDNE x _ (FlagLT))
+	// match: (MOVDNE _ x (FlagLT))
 	// cond:
 	// result: x
 	for {
-		x := v.Args[0]
+		x := v.Args[1]
 		v_2 := v.Args[2]
 		if v_2.Op != OpS390XFlagLT {
 			break
@@ -9875,11 +9907,11 @@
 		v.AddArg(x)
 		return true
 	}
-	// match: (MOVDNE x _ (FlagGT))
+	// match: (MOVDNE _ x (FlagGT))
 	// cond:
 	// result: x
 	for {
-		x := v.Args[0]
+		x := v.Args[1]
 		v_2 := v.Args[2]
 		if v_2.Op != OpS390XFlagGT {
 			break
@@ -9995,7 +10027,7 @@
 	_ = b
 	// match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		off := v.AuxInt
 		sym := v.Aux
@@ -10011,8 +10043,7 @@
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -10164,6 +10195,23 @@
 	}
 	return false
 }
+func rewriteValueS390X_OpS390XMOVDreg(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVDreg x)
+	// cond: x.Uses == 1
+	// result: (MOVDnop x)
+	for {
+		x := v.Args[0]
+		if !(x.Uses == 1) {
+			break
+		}
+		v.reset(OpS390XMOVDnop)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValueS390X_OpS390XMOVDstore(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
@@ -10912,7 +10960,7 @@
 	_ = b
 	// match: (MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _))
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		off := v.AuxInt
 		sym := v.Aux
@@ -10928,8 +10976,7 @@
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -11086,33 +11133,31 @@
 	_ = b
 	// match: (MOVHZreg x:(MOVBZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHZreg x:(MOVHZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHZreg x:(Arg <t>))
 	// cond: (is8BitInt(t) || is16BitInt(t)) && !isSigned(t)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpArg {
@@ -11122,34 +11167,31 @@
 		if !((is8BitInt(t) || is16BitInt(t)) && !isSigned(t)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHZreg x:(MOVBZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHZreg x:(MOVHZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -11277,46 +11319,43 @@
 	_ = b
 	// match: (MOVHreg x:(MOVBload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHreg x:(MOVBZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHreg x:(MOVHload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHreg x:(Arg <t>))
 	// cond: (is8BitInt(t) || is16BitInt(t)) && isSigned(t)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpArg {
@@ -11326,47 +11365,43 @@
 		if !((is8BitInt(t) || is16BitInt(t)) && isSigned(t)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHreg x:(MOVBreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHreg x:(MOVBZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVHreg x:(MOVHreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -12310,7 +12345,7 @@
 	_ = b
 	// match: (MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _))
 	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		off := v.AuxInt
 		sym := v.Aux
@@ -12326,8 +12361,7 @@
 		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -12484,46 +12518,43 @@
 	_ = b
 	// match: (MOVWZreg x:(MOVBZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWZreg x:(MOVHZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWZreg x:(MOVWZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVWZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWZreg x:(Arg <t>))
 	// cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpArg {
@@ -12533,47 +12564,43 @@
 		if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWZreg x:(MOVBZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWZreg x:(MOVHZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWZreg x:(MOVWZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVWZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
@@ -12701,72 +12728,67 @@
 	_ = b
 	// match: (MOVWreg x:(MOVBload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVBZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVHload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVHZload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHZload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVWload _ _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVWload {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(Arg <t>))
 	// cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpArg {
@@ -12776,73 +12798,67 @@
 		if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVBreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVBZreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVBZreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVHreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVHreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVHreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MOVWreg x:(MOVWreg _))
 	// cond:
-	// result: x
+	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
 		if x.Op != OpS390XMOVWreg {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpS390XMOVDreg)
 		v.AddArg(x)
 		return true
 	}
diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go
index 121dfb7..e45ca05 100644
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
@@ -1634,6 +1634,8 @@
 	return body
 }
 
+var dummyCond = &Name{Value: "false"}
+
 func (p *parser) header(forStmt bool) (init SimpleStmt, cond Expr, post SimpleStmt) {
 	if p.tok == _Lbrace {
 		return
@@ -1680,7 +1682,8 @@
 	case *ExprStmt:
 		cond = s.X
 	default:
-		p.error("invalid condition, tag, or type switch guard")
+		p.syntax_error(fmt.Sprintf("%s used as value", String(s)))
+		cond = dummyCond // avoid follow-up error for if statements
 	}
 
 	p.xnest = outer
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go
index 7d5f79f..c51dcea 100644
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -15,7 +15,6 @@
 	"os/exec"
 	"path/filepath"
 	"regexp"
-	"runtime"
 	"strconv"
 	"strings"
 	"sync"
@@ -354,7 +353,7 @@
 
 	// This test needs its stdout/stderr to be terminals, so we don't run it from cmd/go's tests.
 	// See issue 18153.
-	if runtime.GOOS == "linux" {
+	if t.goos == "linux" {
 		t.tests = append(t.tests, distTest{
 			name:    "cmd_go_test_terminal",
 			heading: "cmd/go terminal test",
@@ -568,7 +567,7 @@
 		if t.gohostos == "linux" && t.goarch == "amd64" {
 			t.registerTest("testasan", "../misc/cgo/testasan", "go", "run", "main.go")
 		}
-		if t.gohostos == "linux" && t.goarch == "amd64" {
+		if t.goos == "linux" && t.goarch == "amd64" {
 			t.registerTest("testsanitizers", "../misc/cgo/testsanitizers", "./test.bash")
 		}
 		if t.hasBash() && t.goos != "android" && !t.iOS() && t.gohostos != "windows" {
diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go
index e93fd6e..3d5dd2b 100644
--- a/src/cmd/go/alldocs.go
+++ b/src/cmd/go/alldocs.go
@@ -17,7 +17,7 @@
 // 	clean       remove object files
 // 	doc         show documentation for package or symbol
 // 	env         print Go environment information
-// 	bug         print information for bug reports
+// 	bug         start a bug report
 // 	fix         run go tool fix on packages
 // 	fmt         run gofmt on package sources
 // 	generate    generate Go files by processing source
@@ -324,15 +324,14 @@
 // each named variable on its own line.
 //
 //
-// Print information for bug reports
+// Start a bug report
 //
 // Usage:
 //
 // 	go bug
 //
-// Bug prints information that helps file effective bug reports.
-//
-// Bugs may be reported at https://golang.org/issue/new.
+// Bug opens the default browser and starts a new bug report.
+// The report includes useful system information.
 //
 //
 // Run go tool fix on packages
diff --git a/src/cmd/go/get.go b/src/cmd/go/get.go
index 1d7677c..6fb4235 100644
--- a/src/cmd/go/get.go
+++ b/src/cmd/go/get.go
@@ -428,7 +428,7 @@
 			return fmt.Errorf("cannot download, $GOPATH not set. For more details see: 'go help gopath'")
 		}
 		// Guard against people setting GOPATH=$GOROOT.
-		if list[0] == goroot {
+		if filepath.Clean(list[0]) == filepath.Clean(goroot) {
 			return fmt.Errorf("cannot download, $GOPATH must not be set to $GOROOT. For more details see: 'go help gopath'")
 		}
 		if _, err := os.Stat(filepath.Join(list[0], "src/cmd/go/alldocs.go")); err == nil {
diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go
index 5727eb0..56de65c 100644
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@@ -1683,173 +1683,111 @@
 	}
 }
 
-// Test go env missing GOPATH shows default.
-func TestMissingGOPATHEnvShowsDefault(t *testing.T) {
+func TestDefaultGOPATH(t *testing.T) {
 	tg := testgo(t)
 	defer tg.cleanup()
 	tg.parallel()
-	tg.setenv("GOPATH", "")
+	tg.tempDir("home/go")
+	tg.setenv(homeEnvName(), tg.path("home"))
+
 	tg.run("env", "GOPATH")
+	tg.grepStdout(regexp.QuoteMeta(tg.path("home/go")), "want GOPATH=$HOME/go")
 
-	want := filepath.Join(os.Getenv(homeEnvName()), "go")
-	got := strings.TrimSpace(tg.getStdout())
-	if got != want {
-		t.Errorf("got %q; want %q", got, want)
-	}
+	tg.setenv("GOROOT", tg.path("home/go"))
+	tg.run("env", "GOPATH")
+	tg.grepStdoutNot(".", "want unset GOPATH because GOROOT=$HOME/go")
+
+	tg.setenv("GOROOT", tg.path("home/go")+"/")
+	tg.run("env", "GOPATH")
+	tg.grepStdoutNot(".", "want unset GOPATH because GOROOT=$HOME/go/")
 }
 
-// Test go get missing GOPATH causes go get to warn if directory doesn't exist.
-func TestMissingGOPATHGetWarnsIfNotExists(t *testing.T) {
+func TestDefaultGOPATHGet(t *testing.T) {
 	testenv.MustHaveExternalNetwork(t)
 
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skip("skipping because git binary not found")
-	}
-
 	tg := testgo(t)
 	defer tg.cleanup()
-
-	// setenv variables for test and defer deleting temporary home directory.
 	tg.setenv("GOPATH", "")
-	tmp, err := ioutil.TempDir("", "")
-	if err != nil {
-		t.Fatalf("could not create tmp home: %v", err)
-	}
-	defer os.RemoveAll(tmp)
-	tg.setenv(homeEnvName(), tmp)
+	tg.tempDir("home")
+	tg.setenv(homeEnvName(), tg.path("home"))
 
+	// warn for creating directory
 	tg.run("get", "-v", "github.com/golang/example/hello")
+	tg.grepStderr("created GOPATH="+regexp.QuoteMeta(tg.path("home/go"))+"; see 'go help gopath'", "did not create GOPATH")
 
-	want := fmt.Sprintf("created GOPATH=%s; see 'go help gopath'", filepath.Join(tmp, "go"))
-	got := strings.TrimSpace(tg.getStderr())
-	if !strings.Contains(got, want) {
-		t.Errorf("got %q; want %q", got, want)
-	}
-}
-
-// Test go get missing GOPATH causes no warning if directory exists.
-func TestMissingGOPATHGetDoesntWarnIfExists(t *testing.T) {
-	testenv.MustHaveExternalNetwork(t)
-
-	if _, err := exec.LookPath("git"); err != nil {
-		t.Skip("skipping because git binary not found")
-	}
-
-	tg := testgo(t)
-	defer tg.cleanup()
-
-	// setenv variables for test and defer resetting them.
-	tg.setenv("GOPATH", "")
-	tmp, err := ioutil.TempDir("", "")
-	if err != nil {
-		t.Fatalf("could not create tmp home: %v", err)
-	}
-	defer os.RemoveAll(tmp)
-	if err := os.Mkdir(filepath.Join(tmp, "go"), 0777); err != nil {
-		t.Fatalf("could not create $HOME/go: %v", err)
-	}
-
-	tg.setenv(homeEnvName(), tmp)
-
+	// no warning if directory already exists
+	tg.must(os.RemoveAll(tg.path("home/go")))
+	tg.tempDir("home/go")
 	tg.run("get", "github.com/golang/example/hello")
+	tg.grepStderrNot(".", "expected no output on standard error")
 
-	got := strings.TrimSpace(tg.getStderr())
-	if got != "" {
-		t.Errorf("got %q; wants empty", got)
-	}
+	// error if $HOME/go is a file
+	tg.must(os.RemoveAll(tg.path("home/go")))
+	tg.tempFile("home/go", "")
+	tg.runFail("get", "github.com/golang/example/hello")
+	tg.grepStderr(`mkdir .*[/\\]go: .*(not a directory|cannot find the path)`, "expected error because $HOME/go is a file")
 }
 
-// Test go get missing GOPATH fails if pointed file is not a directory.
-func TestMissingGOPATHGetFailsIfItsNotDirectory(t *testing.T) {
-	testenv.MustHaveExternalNetwork(t)
-
+func TestDefaultGOPATHPrintedSearchList(t *testing.T) {
 	tg := testgo(t)
 	defer tg.cleanup()
-
-	// setenv variables for test and defer resetting them.
 	tg.setenv("GOPATH", "")
-	tmp, err := ioutil.TempDir("", "")
-	if err != nil {
-		t.Fatalf("could not create tmp home: %v", err)
-	}
-	defer os.RemoveAll(tmp)
+	tg.tempDir("home")
+	tg.setenv(homeEnvName(), tg.path("home"))
 
-	path := filepath.Join(tmp, "go")
-	if err := ioutil.WriteFile(path, nil, 0777); err != nil {
-		t.Fatalf("could not create GOPATH at %s: %v", path, err)
-	}
-	tg.setenv(homeEnvName(), tmp)
-
-	const pkg = "github.com/golang/example/hello"
-	tg.runFail("get", pkg)
-
-	msg := "not a directory"
-	if runtime.GOOS == "windows" {
-		msg = "The system cannot find the path specified."
-	}
-	want := fmt.Sprintf("package %s: mkdir %s: %s", pkg, filepath.Join(tmp, "go"), msg)
-	got := strings.TrimSpace(tg.getStderr())
-	if got != want {
-		t.Errorf("got %q; wants %q", got, want)
-	}
-}
-
-// Test go install of missing package when missing GOPATH fails and shows default GOPATH.
-func TestMissingGOPATHInstallMissingPackageFailsAndShowsDefault(t *testing.T) {
-	tg := testgo(t)
-	defer tg.cleanup()
-
-	// setenv variables for test and defer resetting them.
-	tg.setenv("GOPATH", "")
-	tmp, err := ioutil.TempDir("", "")
-	if err != nil {
-		t.Fatalf("could not create tmp home: %v", err)
-	}
-	defer os.RemoveAll(tmp)
-	if err := os.Mkdir(filepath.Join(tmp, "go"), 0777); err != nil {
-		t.Fatalf("could not create $HOME/go: %v", err)
-	}
-	tg.setenv(homeEnvName(), tmp)
-
-	const pkg = "github.com/golang/example/hello"
-	tg.runFail("install", pkg)
-
-	pkgPath := filepath.Join(strings.Split(pkg, "/")...)
-	want := fmt.Sprintf("can't load package: package %s: cannot find package \"%s\" in any of:", pkg, pkg) +
-		fmt.Sprintf("\n\t%s (from $GOROOT)", filepath.Join(runtime.GOROOT(), "src", pkgPath)) +
-		fmt.Sprintf("\n\t%s (from $GOPATH)", filepath.Join(tmp, "go", "src", pkgPath))
-
-	got := strings.TrimSpace(tg.getStderr())
-	if got != want {
-		t.Errorf("got %q; wants %q", got, want)
-	}
+	tg.runFail("install", "github.com/golang/example/hello")
+	tg.grepStderr(regexp.QuoteMeta(tg.path("home/go/src/github.com/golang/example/hello"))+`.*from \$GOPATH`, "expected default GOPATH")
 }
 
 // Issue 4186.  go get cannot be used to download packages to $GOROOT.
 // Test that without GOPATH set, go get should fail.
-func TestWithoutGOPATHGoGetFails(t *testing.T) {
+func TestGoGetIntoGOROOT(t *testing.T) {
 	testenv.MustHaveExternalNetwork(t)
 
 	tg := testgo(t)
 	defer tg.cleanup()
 	tg.parallel()
 	tg.tempDir("src")
-	tg.setenv("GOPATH", "")
-	tg.setenv("GOROOT", tg.path("."))
-	tg.runFail("get", "-d", "golang.org/x/codereview/cmd/hgpatch")
-}
 
-// Test that with GOPATH=$GOROOT, go get should fail.
-func TestWithGOPATHEqualsGOROOTGoGetFails(t *testing.T) {
-	testenv.MustHaveExternalNetwork(t)
-
-	tg := testgo(t)
-	defer tg.cleanup()
-	tg.parallel()
-	tg.tempDir("src")
+	// Fails because GOROOT=GOPATH
 	tg.setenv("GOPATH", tg.path("."))
 	tg.setenv("GOROOT", tg.path("."))
-	tg.runFail("get", "-d", "golang.org/x/codereview/cmd/hgpatch")
+	tg.runFail("get", "-d", "github.com/golang/example/hello")
+	tg.grepStderr("warning: GOPATH set to GOROOT", "go should detect GOPATH=GOROOT")
+	tg.grepStderr(`\$GOPATH must not be set to \$GOROOT`, "go should detect GOPATH=GOROOT")
+
+	// Fails because GOROOT=GOPATH after cleaning.
+	tg.setenv("GOPATH", tg.path(".")+"/")
+	tg.setenv("GOROOT", tg.path("."))
+	tg.runFail("get", "-d", "github.com/golang/example/hello")
+	tg.grepStderr("warning: GOPATH set to GOROOT", "go should detect GOPATH=GOROOT")
+	tg.grepStderr(`\$GOPATH must not be set to \$GOROOT`, "go should detect GOPATH=GOROOT")
+
+	tg.setenv("GOPATH", tg.path("."))
+	tg.setenv("GOROOT", tg.path(".")+"/")
+	tg.runFail("get", "-d", "github.com/golang/example/hello")
+	tg.grepStderr("warning: GOPATH set to GOROOT", "go should detect GOPATH=GOROOT")
+	tg.grepStderr(`\$GOPATH must not be set to \$GOROOT`, "go should detect GOPATH=GOROOT")
+
+	// Fails because GOROOT=$HOME/go so default GOPATH unset.
+	tg.tempDir("home/go")
+	tg.setenv(homeEnvName(), tg.path("home"))
+	tg.setenv("GOPATH", "")
+	tg.setenv("GOROOT", tg.path("home/go"))
+	tg.runFail("get", "-d", "github.com/golang/example/hello")
+	tg.grepStderr(`\$GOPATH not set`, "expected GOPATH not set")
+
+	tg.setenv(homeEnvName(), tg.path("home")+"/")
+	tg.setenv("GOPATH", "")
+	tg.setenv("GOROOT", tg.path("home/go"))
+	tg.runFail("get", "-d", "github.com/golang/example/hello")
+	tg.grepStderr(`\$GOPATH not set`, "expected GOPATH not set")
+
+	tg.setenv(homeEnvName(), tg.path("home"))
+	tg.setenv("GOPATH", "")
+	tg.setenv("GOROOT", tg.path("home/go")+"/")
+	tg.runFail("get", "-d", "github.com/golang/example/hello")
+	tg.grepStderr(`\$GOPATH not set`, "expected GOPATH not set")
 }
 
 func TestLdflagsArgumentsWithSpacesIssue3941(t *testing.T) {
@@ -3744,6 +3682,13 @@
 	tg.grepBoth(okPattern, "go test did not say ok")
 }
 
+// Issue 18845
+func TestBenchTimeout(t *testing.T) {
+	tg := testgo(t)
+	defer tg.cleanup()
+	tg.run("test", "-bench", ".", "-timeout", "750ms", "testdata/timeoutbench_test.go")
+}
+
 func TestLinkXImportPathEscape(t *testing.T) {
 	// golang.org/issue/16710
 	tg := testgo(t)
@@ -3787,3 +3732,26 @@
 	tg.setenv("GOPATH", tg.path("go"))
 	tg.run("build", "p")
 }
+
+// Issue 18778.
+func TestDotDotDotOutsideGOPATH(t *testing.T) {
+	tg := testgo(t)
+	defer tg.cleanup()
+
+	tg.tempFile("pkgs/a.go", `package x`)
+	tg.tempFile("pkgs/a_test.go", `package x_test
+import "testing"
+func TestX(t *testing.T) {}`)
+
+	tg.tempFile("pkgs/a/a.go", `package a`)
+	tg.tempFile("pkgs/a/a_test.go", `package a_test
+import "testing"
+func TestA(t *testing.T) {}`)
+
+	tg.cd(tg.path("pkgs"))
+	tg.run("build", "./...")
+	tg.run("test", "./...")
+	tg.run("list", "./...")
+	tg.grepStdout("pkgs$", "expected package not listed")
+	tg.grepStdout("pkgs/a", "expected package not listed")
+}
diff --git a/src/cmd/go/main.go b/src/cmd/go/main.go
index 07fc4e2..d80ff2d 100644
--- a/src/cmd/go/main.go
+++ b/src/cmd/go/main.go
@@ -136,7 +136,7 @@
 	// Diagnose common mistake: GOPATH==GOROOT.
 	// This setting is equivalent to not setting GOPATH at all,
 	// which is not what most people want when they do it.
-	if gopath := buildContext.GOPATH; gopath == runtime.GOROOT() {
+	if gopath := buildContext.GOPATH; filepath.Clean(gopath) == filepath.Clean(runtime.GOROOT()) {
 		fmt.Fprintf(os.Stderr, "warning: GOPATH set to GOROOT (%s) has no effect\n", gopath)
 	} else {
 		for _, p := range filepath.SplitList(gopath) {
diff --git a/src/cmd/go/pkg.go b/src/cmd/go/pkg.go
index d69fa51..e40f942 100644
--- a/src/cmd/go/pkg.go
+++ b/src/cmd/go/pkg.go
@@ -429,7 +429,7 @@
 func cleanImport(path string) string {
 	orig := path
 	path = pathpkg.Clean(path)
-	if strings.HasPrefix(orig, "./") && path != ".." && path != "." && !strings.HasPrefix(path, "../") {
+	if strings.HasPrefix(orig, "./") && path != ".." && !strings.HasPrefix(path, "../") {
 		path = "./" + path
 	}
 	return path
diff --git a/src/cmd/go/testdata/timeoutbench_test.go b/src/cmd/go/testdata/timeoutbench_test.go
new file mode 100644
index 0000000..57a8888
--- /dev/null
+++ b/src/cmd/go/testdata/timeoutbench_test.go
@@ -0,0 +1,10 @@
+package timeoutbench_test
+
+import (
+	"testing"
+	"time"
+)
+
+func BenchmarkSleep1s(b *testing.B) {
+	time.Sleep(1 * time.Second)
+}
diff --git a/src/cmd/link/internal/ld/config.go b/src/cmd/link/internal/ld/config.go
index 7d00ff1..2656c24 100644
--- a/src/cmd/link/internal/ld/config.go
+++ b/src/cmd/link/internal/ld/config.go
@@ -238,6 +238,8 @@
 				Linkmode = LinkExternal
 			} else if iscgo && externalobj {
 				Linkmode = LinkExternal
+			} else if Buildmode == BuildmodePIE {
+				Linkmode = LinkExternal // https://golang.org/issue/18968
 			} else {
 				Linkmode = LinkInternal
 			}
diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go
index 61d3e4f..22d2c54 100644
--- a/src/cmd/link/internal/ld/dwarf.go
+++ b/src/cmd/link/internal/ld/dwarf.go
@@ -1080,7 +1080,7 @@
 		epcs = s
 
 		dsym := ctxt.Syms.Lookup(dwarf.InfoPrefix+s.Name, int(s.Version))
-		dsym.Attr |= AttrHidden
+		dsym.Attr |= AttrHidden | AttrReachable
 		dsym.Type = obj.SDWARFINFO
 		for _, r := range dsym.R {
 			if r.Type == obj.R_DWARFREF && r.Sym.Size == 0 {
diff --git a/src/crypto/x509/root_linux.go b/src/crypto/x509/root_linux.go
index 38dd72d..aa1785e 100644
--- a/src/crypto/x509/root_linux.go
+++ b/src/crypto/x509/root_linux.go
@@ -7,8 +7,8 @@
 // Possible certificate files; stop after finding one.
 var certFiles = []string{
 	"/etc/ssl/certs/ca-certificates.crt",                // Debian/Ubuntu/Gentoo etc.
-	"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", // CentOS/RHEL 7
 	"/etc/pki/tls/certs/ca-bundle.crt",                  // Fedora/RHEL 6
 	"/etc/ssl/ca-bundle.pem",                            // OpenSUSE
 	"/etc/pki/tls/cacert.pem",                           // OpenELEC
+	"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", // CentOS/RHEL 7
 }
diff --git a/src/database/sql/ctxutil.go b/src/database/sql/ctxutil.go
index 1071446..bd652b5 100644
--- a/src/database/sql/ctxutil.go
+++ b/src/database/sql/ctxutil.go
@@ -35,15 +35,12 @@
 		return nil, err
 	}
 
-	resi, err := execer.Exec(query, dargs)
-	if err == nil {
-		select {
-		default:
-		case <-ctx.Done():
-			return resi, ctx.Err()
-		}
+	select {
+	default:
+	case <-ctx.Done():
+		return nil, ctx.Err()
 	}
-	return resi, err
+	return execer.Exec(query, dargs)
 }
 
 func ctxDriverQuery(ctx context.Context, queryer driver.Queryer, query string, nvdargs []driver.NamedValue) (driver.Rows, error) {
@@ -56,16 +53,12 @@
 		return nil, err
 	}
 
-	rowsi, err := queryer.Query(query, dargs)
-	if err == nil {
-		select {
-		default:
-		case <-ctx.Done():
-			rowsi.Close()
-			return nil, ctx.Err()
-		}
+	select {
+	default:
+	case <-ctx.Done():
+		return nil, ctx.Err()
 	}
-	return rowsi, err
+	return queryer.Query(query, dargs)
 }
 
 func ctxDriverStmtExec(ctx context.Context, si driver.Stmt, nvdargs []driver.NamedValue) (driver.Result, error) {
@@ -77,15 +70,12 @@
 		return nil, err
 	}
 
-	resi, err := si.Exec(dargs)
-	if err == nil {
-		select {
-		default:
-		case <-ctx.Done():
-			return resi, ctx.Err()
-		}
+	select {
+	default:
+	case <-ctx.Done():
+		return nil, ctx.Err()
 	}
-	return resi, err
+	return si.Exec(dargs)
 }
 
 func ctxDriverStmtQuery(ctx context.Context, si driver.Stmt, nvdargs []driver.NamedValue) (driver.Rows, error) {
@@ -97,16 +87,12 @@
 		return nil, err
 	}
 
-	rowsi, err := si.Query(dargs)
-	if err == nil {
-		select {
-		default:
-		case <-ctx.Done():
-			rowsi.Close()
-			return nil, ctx.Err()
-		}
+	select {
+	default:
+	case <-ctx.Done():
+		return nil, ctx.Err()
 	}
-	return rowsi, err
+	return si.Query(dargs)
 }
 
 var errLevelNotSupported = errors.New("sql: selected isolation level is not supported")
diff --git a/src/database/sql/sql.go b/src/database/sql/sql.go
index 0fa7c34..c016681 100644
--- a/src/database/sql/sql.go
+++ b/src/database/sql/sql.go
@@ -305,8 +305,9 @@
 
 	mu           sync.Mutex // protects following fields
 	freeConn     []*driverConn
-	connRequests []chan connRequest
-	numOpen      int // number of opened and pending open connections
+	connRequests map[uint64]chan connRequest
+	nextRequest  uint64 // Next key to use in connRequests.
+	numOpen      int    // number of opened and pending open connections
 	// Used to signal the need for new connections
 	// a goroutine running connectionOpener() reads on this chan and
 	// maybeOpenNewConnections sends on the chan (one send per needed connection)
@@ -572,10 +573,11 @@
 		return nil, fmt.Errorf("sql: unknown driver %q (forgotten import?)", driverName)
 	}
 	db := &DB{
-		driver:   driveri,
-		dsn:      dataSourceName,
-		openerCh: make(chan struct{}, connectionRequestQueueSize),
-		lastPut:  make(map[*driverConn]string),
+		driver:       driveri,
+		dsn:          dataSourceName,
+		openerCh:     make(chan struct{}, connectionRequestQueueSize),
+		lastPut:      make(map[*driverConn]string),
+		connRequests: make(map[uint64]chan connRequest),
 	}
 	go db.connectionOpener()
 	return db, nil
@@ -881,6 +883,14 @@
 
 var errDBClosed = errors.New("sql: database is closed")
 
+// nextRequestKeyLocked returns the next connection request key.
+// It is assumed that nextRequest will not overflow.
+func (db *DB) nextRequestKeyLocked() uint64 {
+	next := db.nextRequest
+	db.nextRequest++
+	return next
+}
+
 // conn returns a newly-opened or cached *driverConn.
 func (db *DB) conn(ctx context.Context, strategy connReuseStrategy) (*driverConn, error) {
 	db.mu.Lock()
@@ -918,12 +928,25 @@
 		// Make the connRequest channel. It's buffered so that the
 		// connectionOpener doesn't block while waiting for the req to be read.
 		req := make(chan connRequest, 1)
-		db.connRequests = append(db.connRequests, req)
+		reqKey := db.nextRequestKeyLocked()
+		db.connRequests[reqKey] = req
 		db.mu.Unlock()
 
 		// Timeout the connection request with the context.
 		select {
 		case <-ctx.Done():
+			// Remove the connection request and ensure no value has been sent
+			// on it after removing.
+			db.mu.Lock()
+			delete(db.connRequests, reqKey)
+			db.mu.Unlock()
+			select {
+			default:
+			case ret, ok := <-req:
+				if ok {
+					db.putConn(ret.conn, ret.err)
+				}
+			}
 			return nil, ctx.Err()
 		case ret, ok := <-req:
 			if !ok {
@@ -1044,12 +1067,12 @@
 		return false
 	}
 	if c := len(db.connRequests); c > 0 {
-		req := db.connRequests[0]
-		// This copy is O(n) but in practice faster than a linked list.
-		// TODO: consider compacting it down less often and
-		// moving the base instead?
-		copy(db.connRequests, db.connRequests[1:])
-		db.connRequests = db.connRequests[:c-1]
+		var req chan connRequest
+		var reqKey uint64
+		for reqKey, req = range db.connRequests {
+			break
+		}
+		delete(db.connRequests, reqKey) // Remove from pending requests.
 		if err == nil {
 			dc.inUse = true
 		}
@@ -1357,16 +1380,7 @@
 		cancel: cancel,
 		ctx:    ctx,
 	}
-	go func(tx *Tx) {
-		select {
-		case <-tx.ctx.Done():
-			if !tx.isDone() {
-				// Discard and close the connection used to ensure the transaction
-				// is closed and the resources are released.
-				tx.rollback(true)
-			}
-		}
-	}(tx)
+	go tx.awaitDone()
 	return tx, nil
 }
 
@@ -1388,6 +1402,11 @@
 type Tx struct {
 	db *DB
 
+	// closemu prevents the transaction from closing while there
+	// is an active query. It is held for read during queries
+	// and exclusively during close.
+	closemu sync.RWMutex
+
 	// dc is owned exclusively until Commit or Rollback, at which point
 	// it's returned with putConn.
 	dc  *driverConn
@@ -1413,6 +1432,20 @@
 	ctx context.Context
 }
 
+// awaitDone blocks until the context in Tx is canceled and rolls back
+// the transaction if it's not already done.
+func (tx *Tx) awaitDone() {
+	// Wait for either the transaction to be committed or rolled
+	// back, or for the associated context to be closed.
+	<-tx.ctx.Done()
+
+	// Discard and close the connection used to ensure the
+	// transaction is closed and the resources are released.  This
+	// rollback does nothing if the transaction has already been
+	// committed or rolled back.
+	tx.rollback(true)
+}
+
 func (tx *Tx) isDone() bool {
 	return atomic.LoadInt32(&tx.done) != 0
 }
@@ -1424,16 +1457,31 @@
 // close returns the connection to the pool and
 // must only be called by Tx.rollback or Tx.Commit.
 func (tx *Tx) close(err error) {
+	tx.closemu.Lock()
+	defer tx.closemu.Unlock()
+
 	tx.db.putConn(tx.dc, err)
 	tx.cancel()
 	tx.dc = nil
 	tx.txi = nil
 }
 
+// hookTxGrabConn specifies an optional hook to be called on
+// a successful call to (*Tx).grabConn. For tests.
+var hookTxGrabConn func()
+
 func (tx *Tx) grabConn(ctx context.Context) (*driverConn, error) {
+	select {
+	default:
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
 	if tx.isDone() {
 		return nil, ErrTxDone
 	}
+	if hookTxGrabConn != nil { // test hook
+		hookTxGrabConn()
+	}
 	return tx.dc, nil
 }
 
@@ -1503,6 +1551,9 @@
 // for the execution of the returned statement. The returned statement
 // will run in the transaction context.
 func (tx *Tx) PrepareContext(ctx context.Context, query string) (*Stmt, error) {
+	tx.closemu.RLock()
+	defer tx.closemu.RUnlock()
+
 	// TODO(bradfitz): We could be more efficient here and either
 	// provide a method to take an existing Stmt (created on
 	// perhaps a different Conn), and re-create it on this Conn if
@@ -1567,6 +1618,9 @@
 // The returned statement operates within the transaction and will be closed
 // when the transaction has been committed or rolled back.
 func (tx *Tx) StmtContext(ctx context.Context, stmt *Stmt) *Stmt {
+	tx.closemu.RLock()
+	defer tx.closemu.RUnlock()
+
 	// TODO(bradfitz): optimize this. Currently this re-prepares
 	// each time. This is fine for now to illustrate the API but
 	// we should really cache already-prepared statements
@@ -1618,6 +1672,9 @@
 // ExecContext executes a query that doesn't return rows.
 // For example: an INSERT and UPDATE.
 func (tx *Tx) ExecContext(ctx context.Context, query string, args ...interface{}) (Result, error) {
+	tx.closemu.RLock()
+	defer tx.closemu.RUnlock()
+
 	dc, err := tx.grabConn(ctx)
 	if err != nil {
 		return nil, err
@@ -1661,6 +1718,9 @@
 
 // QueryContext executes a query that returns rows, typically a SELECT.
 func (tx *Tx) QueryContext(ctx context.Context, query string, args ...interface{}) (*Rows, error) {
+	tx.closemu.RLock()
+	defer tx.closemu.RUnlock()
+
 	dc, err := tx.grabConn(ctx)
 	if err != nil {
 		return nil, err
@@ -2034,29 +2094,32 @@
 	dc          *driverConn // owned; must call releaseConn when closed to release
 	releaseConn func(error)
 	rowsi       driver.Rows
+	cancel      func()      // called when Rows is closed, may be nil.
+	closeStmt   *driverStmt // if non-nil, statement to Close on close
 
-	// closed value is 1 when the Rows is closed.
-	// Use atomic operations on value when checking value.
-	closed    int32
-	ctxClose  chan struct{} // closed when Rows is closed, may be null.
-	lastcols  []driver.Value
-	lasterr   error       // non-nil only if closed is true
-	closeStmt *driverStmt // if non-nil, statement to Close on close
+	// closemu prevents Rows from closing while there
+	// is an active streaming result. It is held for read during non-close operations
+	// and exclusively during close.
+	//
+	// closemu guards lasterr and closed.
+	closemu sync.RWMutex
+	closed  bool
+	lasterr error // non-nil only if closed is true
+
+	// lastcols is only used in Scan, Next, and NextResultSet which are expected
+	// not not be called concurrently.
+	lastcols []driver.Value
 }
 
 func (rs *Rows) initContextClose(ctx context.Context) {
-	if ctx.Done() == context.Background().Done() {
-		return
-	}
+	ctx, rs.cancel = context.WithCancel(ctx)
+	go rs.awaitDone(ctx)
+}
 
-	rs.ctxClose = make(chan struct{})
-	go func() {
-		select {
-		case <-ctx.Done():
-			rs.Close()
-		case <-rs.ctxClose:
-		}
-	}()
+// awaitDone blocks until the rows are closed or the context canceled.
+func (rs *Rows) awaitDone(ctx context.Context) {
+	<-ctx.Done()
+	rs.close(ctx.Err())
 }
 
 // Next prepares the next result row for reading with the Scan method. It
@@ -2066,8 +2129,19 @@
 //
 // Every call to Scan, even the first one, must be preceded by a call to Next.
 func (rs *Rows) Next() bool {
-	if rs.isClosed() {
-		return false
+	var doClose, ok bool
+	withLock(rs.closemu.RLocker(), func() {
+		doClose, ok = rs.nextLocked()
+	})
+	if doClose {
+		rs.Close()
+	}
+	return ok
+}
+
+func (rs *Rows) nextLocked() (doClose, ok bool) {
+	if rs.closed {
+		return false, false
 	}
 	if rs.lastcols == nil {
 		rs.lastcols = make([]driver.Value, len(rs.rowsi.Columns()))
@@ -2076,23 +2150,21 @@
 	if rs.lasterr != nil {
 		// Close the connection if there is a driver error.
 		if rs.lasterr != io.EOF {
-			rs.Close()
-			return false
+			return true, false
 		}
 		nextResultSet, ok := rs.rowsi.(driver.RowsNextResultSet)
 		if !ok {
-			rs.Close()
-			return false
+			return true, false
 		}
 		// The driver is at the end of the current result set.
 		// Test to see if there is another result set after the current one.
 		// Only close Rows if there is no further result sets to read.
 		if !nextResultSet.HasNextResultSet() {
-			rs.Close()
+			doClose = true
 		}
-		return false
+		return doClose, false
 	}
-	return true
+	return false, true
 }
 
 // NextResultSet prepares the next result set for reading. It returns true if
@@ -2104,18 +2176,28 @@
 // scanning. If there are further result sets they may not have rows in the result
 // set.
 func (rs *Rows) NextResultSet() bool {
-	if rs.isClosed() {
+	var doClose bool
+	defer func() {
+		if doClose {
+			rs.Close()
+		}
+	}()
+	rs.closemu.RLock()
+	defer rs.closemu.RUnlock()
+
+	if rs.closed {
 		return false
 	}
+
 	rs.lastcols = nil
 	nextResultSet, ok := rs.rowsi.(driver.RowsNextResultSet)
 	if !ok {
-		rs.Close()
+		doClose = true
 		return false
 	}
 	rs.lasterr = nextResultSet.NextResultSet()
 	if rs.lasterr != nil {
-		rs.Close()
+		doClose = true
 		return false
 	}
 	return true
@@ -2124,6 +2206,8 @@
 // Err returns the error, if any, that was encountered during iteration.
 // Err may be called after an explicit or implicit Close.
 func (rs *Rows) Err() error {
+	rs.closemu.RLock()
+	defer rs.closemu.RUnlock()
 	if rs.lasterr == io.EOF {
 		return nil
 	}
@@ -2134,7 +2218,9 @@
 // Columns returns an error if the rows are closed, or if the rows
 // are from QueryRow and there was a deferred error.
 func (rs *Rows) Columns() ([]string, error) {
-	if rs.isClosed() {
+	rs.closemu.RLock()
+	defer rs.closemu.RUnlock()
+	if rs.closed {
 		return nil, errors.New("sql: Rows are closed")
 	}
 	if rs.rowsi == nil {
@@ -2146,7 +2232,9 @@
 // ColumnTypes returns column information such as column type, length,
 // and nullable. Some information may not be available from some drivers.
 func (rs *Rows) ColumnTypes() ([]*ColumnType, error) {
-	if rs.isClosed() {
+	rs.closemu.RLock()
+	defer rs.closemu.RUnlock()
+	if rs.closed {
 		return nil, errors.New("sql: Rows are closed")
 	}
 	if rs.rowsi == nil {
@@ -2296,9 +2384,13 @@
 // For scanning into *bool, the source may be true, false, 1, 0, or
 // string inputs parseable by strconv.ParseBool.
 func (rs *Rows) Scan(dest ...interface{}) error {
-	if rs.isClosed() {
+	rs.closemu.RLock()
+	if rs.closed {
+		rs.closemu.RUnlock()
 		return errors.New("sql: Rows are closed")
 	}
+	rs.closemu.RUnlock()
+
 	if rs.lastcols == nil {
 		return errors.New("sql: Scan called without calling Next")
 	}
@@ -2314,27 +2406,39 @@
 	return nil
 }
 
-var rowsCloseHook func(*Rows, *error)
-
-func (rs *Rows) isClosed() bool {
-	return atomic.LoadInt32(&rs.closed) != 0
-}
+// rowsCloseHook returns a function so tests may install the
+// hook throug a test only mutex.
+var rowsCloseHook = func() func(*Rows, *error) { return nil }
 
 // Close closes the Rows, preventing further enumeration. If Next is called
 // and returns false and there are no further result sets,
 // the Rows are closed automatically and it will suffice to check the
 // result of Err. Close is idempotent and does not affect the result of Err.
 func (rs *Rows) Close() error {
-	if !atomic.CompareAndSwapInt32(&rs.closed, 0, 1) {
+	return rs.close(nil)
+}
+
+func (rs *Rows) close(err error) error {
+	rs.closemu.Lock()
+	defer rs.closemu.Unlock()
+
+	if rs.closed {
 		return nil
 	}
-	if rs.ctxClose != nil {
-		close(rs.ctxClose)
+	rs.closed = true
+
+	if rs.lasterr == nil {
+		rs.lasterr = err
 	}
-	err := rs.rowsi.Close()
-	if fn := rowsCloseHook; fn != nil {
+
+	err = rs.rowsi.Close()
+	if fn := rowsCloseHook(); fn != nil {
 		fn(rs, &err)
 	}
+	if rs.cancel != nil {
+		rs.cancel()
+	}
+
 	if rs.closeStmt != nil {
 		rs.closeStmt.Close()
 	}
diff --git a/src/database/sql/sql_test.go b/src/database/sql/sql_test.go
index 63e1292..450e5f1 100644
--- a/src/database/sql/sql_test.go
+++ b/src/database/sql/sql_test.go
@@ -14,6 +14,7 @@
 	"runtime"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"testing"
 	"time"
 )
@@ -152,8 +153,13 @@
 	if err != nil {
 		t.Fatalf("error closing DB: %v", err)
 	}
-	if count := db.numOpenConns(); count != 0 {
-		t.Fatalf("%d connections still open after closing DB", count)
+
+	var numOpen int
+	if !waitCondition(5*time.Second, 5*time.Millisecond, func() bool {
+		numOpen = db.numOpenConns()
+		return numOpen == 0
+	}) {
+		t.Fatalf("%d connections still open after closing DB", numOpen)
 	}
 }
 
@@ -275,6 +281,7 @@
 	}
 }
 
+// TestQueryContext tests canceling the context while scanning the rows.
 func TestQueryContext(t *testing.T) {
 	db := newTestDB(t, "people")
 	defer closeDB(t, db)
@@ -296,7 +303,7 @@
 	for rows.Next() {
 		if index == 2 {
 			cancel()
-			time.Sleep(10 * time.Millisecond)
+			waitForRowsClose(t, rows, 5*time.Second)
 		}
 		var r row
 		err = rows.Scan(&r.age, &r.name)
@@ -312,9 +319,13 @@
 		got = append(got, r)
 		index++
 	}
-	err = rows.Err()
-	if err != nil {
-		t.Fatalf("Err: %v", err)
+	select {
+	case <-ctx.Done():
+		if err := ctx.Err(); err != context.Canceled {
+			t.Fatalf("context err = %v; want context.Canceled")
+		}
+	default:
+		t.Fatalf("context err = nil; want context.Canceled")
 	}
 	want := []row{
 		{age: 1, name: "Alice"},
@@ -326,9 +337,8 @@
 
 	// And verify that the final rows.Next() call, which hit EOF,
 	// also closed the rows connection.
-	if n := db.numFreeConns(); n != 1 {
-		t.Fatalf("free conns after query hitting EOF = %d; want 1", n)
-	}
+	waitForRowsClose(t, rows, 5*time.Second)
+	waitForFree(t, db, 5*time.Second, 1)
 	if prepares := numPrepares(t, db) - prepares0; prepares != 1 {
 		t.Errorf("executed %d Prepare statements; want 1", prepares)
 	}
@@ -345,12 +355,39 @@
 	return false
 }
 
+// waitForFree checks db.numFreeConns until either it equals want or
+// the maxWait time elapses.
+func waitForFree(t *testing.T, db *DB, maxWait time.Duration, want int) {
+	var numFree int
+	if !waitCondition(maxWait, 5*time.Millisecond, func() bool {
+		numFree = db.numFreeConns()
+		return numFree == want
+	}) {
+		t.Fatalf("free conns after hitting EOF = %d; want %d", numFree, want)
+	}
+}
+
+func waitForRowsClose(t *testing.T, rows *Rows, maxWait time.Duration) {
+	if !waitCondition(maxWait, 5*time.Millisecond, func() bool {
+		rows.closemu.RLock()
+		defer rows.closemu.RUnlock()
+		return rows.closed
+	}) {
+		t.Fatal("failed to close rows")
+	}
+}
+
+// TestQueryContextWait ensures that rows and all internal statements are closed when
+// a query context is closed during execution.
 func TestQueryContextWait(t *testing.T) {
 	db := newTestDB(t, "people")
 	defer closeDB(t, db)
 	prepares0 := numPrepares(t, db)
 
-	ctx, _ := context.WithTimeout(context.Background(), time.Millisecond*15)
+	// TODO(kardianos): convert this from using a timeout to using an explicit
+	// cancel when the query signals that is is "executing" the query.
+	ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond)
+	defer cancel()
 
 	// This will trigger the *fakeConn.Prepare method which will take time
 	// performing the query. The ctxDriverPrepare func will check the context
@@ -361,14 +398,17 @@
 	}
 
 	// Verify closed rows connection after error condition.
-	if n := db.numFreeConns(); n != 1 {
-		t.Fatalf("free conns after query hitting EOF = %d; want 1", n)
-	}
+	waitForFree(t, db, 5*time.Second, 1)
 	if prepares := numPrepares(t, db) - prepares0; prepares != 1 {
-		t.Errorf("executed %d Prepare statements; want 1", prepares)
+		// TODO(kardianos): if the context timeouts before the db.QueryContext
+		// executes this check may fail. After adjusting how the context
+		// is canceled above revert this back to a Fatal error.
+		t.Logf("executed %d Prepare statements; want 1", prepares)
 	}
 }
 
+// TestTxContextWait tests the transaction behavior when the tx context is canceled
+// during execution of the query.
 func TestTxContextWait(t *testing.T) {
 	db := newTestDB(t, "people")
 	defer closeDB(t, db)
@@ -377,6 +417,10 @@
 
 	tx, err := db.BeginTx(ctx, nil)
 	if err != nil {
+		// Guard against the context being canceled before BeginTx completes.
+		if err == context.DeadlineExceeded {
+			t.Skip("tx context canceled prior to first use")
+		}
 		t.Fatal(err)
 	}
 
@@ -388,19 +432,7 @@
 		t.Fatalf("expected QueryContext to error with context deadline exceeded but returned %v", err)
 	}
 
-	var numFree int
-	if !waitCondition(5*time.Second, 5*time.Millisecond, func() bool {
-		numFree = db.numFreeConns()
-		return numFree == 0
-	}) {
-		t.Fatalf("free conns after hitting EOF = %d; want 0", numFree)
-	}
-
-	// Ensure the dropped connection allows more connections to be made.
-	// Checked on DB Close.
-	waitCondition(5*time.Second, 5*time.Millisecond, func() bool {
-		return db.numOpenConns() == 0
-	})
+	waitForFree(t, db, 5*time.Second, 0)
 }
 
 func TestMultiResultSetQuery(t *testing.T) {
@@ -471,9 +503,7 @@
 
 	// And verify that the final rows.Next() call, which hit EOF,
 	// also closed the rows connection.
-	if n := db.numFreeConns(); n != 1 {
-		t.Fatalf("free conns after query hitting EOF = %d; want 1", n)
-	}
+	waitForFree(t, db, 5*time.Second, 1)
 	if prepares := numPrepares(t, db) - prepares0; prepares != 1 {
 		t.Errorf("executed %d Prepare statements; want 1", prepares)
 	}
@@ -526,6 +556,63 @@
 	}
 }
 
+func TestPoolExhaustOnCancel(t *testing.T) {
+	if testing.Short() {
+		t.Skip("long test")
+	}
+	db := newTestDB(t, "people")
+	defer closeDB(t, db)
+
+	max := 3
+
+	db.SetMaxOpenConns(max)
+
+	// First saturate the connection pool.
+	// Then start new requests for a connection that is cancelled after it is requested.
+
+	var saturate, saturateDone sync.WaitGroup
+	saturate.Add(max)
+	saturateDone.Add(max)
+
+	for i := 0; i < max; i++ {
+		go func() {
+			saturate.Done()
+			rows, err := db.Query("WAIT|500ms|SELECT|people|name,photo|")
+			if err != nil {
+				t.Fatalf("Query: %v", err)
+			}
+			rows.Close()
+			saturateDone.Done()
+		}()
+	}
+
+	saturate.Wait()
+
+	// Now cancel the request while it is waiting.
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second*2)
+	defer cancel()
+
+	for i := 0; i < max; i++ {
+		ctxReq, cancelReq := context.WithCancel(ctx)
+		go func() {
+			time.Sleep(time.Millisecond * 100)
+			cancelReq()
+		}()
+		err := db.PingContext(ctxReq)
+		if err != context.Canceled {
+			t.Fatalf("PingContext (Exhaust): %v", err)
+		}
+	}
+
+	saturateDone.Wait()
+
+	// Now try to open a normal connection.
+	err := db.PingContext(ctx)
+	if err != nil {
+		t.Fatalf("PingContext (Normal): %v", err)
+	}
+}
+
 func TestByteOwnership(t *testing.T) {
 	db := newTestDB(t, "people")
 	defer closeDB(t, db)
@@ -1135,6 +1222,24 @@
 	}
 }
 
+var atomicRowsCloseHook atomic.Value // of func(*Rows, *error)
+
+func init() {
+	rowsCloseHook = func() func(*Rows, *error) {
+		fn, _ := atomicRowsCloseHook.Load().(func(*Rows, *error))
+		return fn
+	}
+}
+
+func setRowsCloseHook(fn func(*Rows, *error)) {
+	if fn == nil {
+		// Can't change an atomic.Value back to nil, so set it to this
+		// no-op func instead.
+		fn = func(*Rows, *error) {}
+	}
+	atomicRowsCloseHook.Store(fn)
+}
+
 // Test issue 6651
 func TestIssue6651(t *testing.T) {
 	db := newTestDB(t, "people")
@@ -1147,6 +1252,7 @@
 		return fmt.Errorf(want)
 	}
 	defer func() { rowsCursorNextHook = nil }()
+
 	err := db.QueryRow("SELECT|people|name|").Scan(&v)
 	if err == nil || err.Error() != want {
 		t.Errorf("error = %q; want %q", err, want)
@@ -1154,10 +1260,10 @@
 	rowsCursorNextHook = nil
 
 	want = "error in rows.Close"
-	rowsCloseHook = func(rows *Rows, err *error) {
+	setRowsCloseHook(func(rows *Rows, err *error) {
 		*err = fmt.Errorf(want)
-	}
-	defer func() { rowsCloseHook = nil }()
+	})
+	defer setRowsCloseHook(nil)
 	err = db.QueryRow("SELECT|people|name|").Scan(&v)
 	if err == nil || err.Error() != want {
 		t.Errorf("error = %q; want %q", err, want)
@@ -1830,7 +1936,9 @@
 		db.dumpDeps(t)
 	}
 
-	if len(stmt.css) > nquery {
+	if !waitCondition(5*time.Second, 5*time.Millisecond, func() bool {
+		return len(stmt.css) <= nquery
+	}) {
 		t.Errorf("len(stmt.css) = %d; want <= %d", len(stmt.css), nquery)
 	}
 
@@ -2576,10 +2684,10 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	rowsCloseHook = func(rows *Rows, err *error) {
+	setRowsCloseHook(func(rows *Rows, err *error) {
 		*err = driver.ErrBadConn
-	}
-	defer func() { rowsCloseHook = nil }()
+	})
+	defer setRowsCloseHook(nil)
 	for i := 0; i < 10; i++ {
 		rows, err := stmt.Query()
 		if err != nil {
@@ -2642,7 +2750,10 @@
 			if err != nil {
 				return
 			}
-			rows, err := tx.QueryContext(ctx, "WAIT|"+qwait+"|SELECT|people|name|")
+			// This is expected to give a cancel error many, but not all the time.
+			// Test failure will happen with a panic or other race condition being
+			// reported.
+			rows, _ := tx.QueryContext(ctx, "WAIT|"+qwait+"|SELECT|people|name|")
 			if rows != nil {
 				rows.Close()
 			}
@@ -2652,7 +2763,50 @@
 		}()
 	}
 	wg.Wait()
-	time.Sleep(milliWait * 3 * time.Millisecond)
+}
+
+// TestIssue18719 closes the context right before use. The sql.driverConn
+// will nil out the ci on close in a lock, but if another process uses it right after
+// it will panic with on the nil ref.
+//
+// See https://golang.org/cl/35550 .
+func TestIssue18719(t *testing.T) {
+	db := newTestDB(t, "people")
+	defer closeDB(t, db)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	hookTxGrabConn = func() {
+		cancel()
+
+		// Wait for the context to cancel and tx to rollback.
+		for tx.isDone() == false {
+			time.Sleep(time.Millisecond * 3)
+		}
+	}
+	defer func() { hookTxGrabConn = nil }()
+
+	// This call will grab the connection and cancel the context
+	// after it has done so. Code after must deal with the canceled state.
+	rows, err := tx.QueryContext(ctx, "SELECT|people|name|")
+	if err != nil {
+		rows.Close()
+		t.Fatalf("expected error %v but got %v", nil, err)
+	}
+
+	// Rows may be ignored because it will be closed when the context is canceled.
+
+	// Do not explicitly rollback. The rollback will happen from the
+	// canceled context.
+
+	cancel()
+	waitForRowsClose(t, rows, 5*time.Second)
 }
 
 func TestConcurrency(t *testing.T) {
diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go
index 1176f5d..4c6ba8c 100644
--- a/src/encoding/xml/marshal.go
+++ b/src/encoding/xml/marshal.go
@@ -775,6 +775,20 @@
 
 var ddBytes = []byte("--")
 
+// indirect drills into interfaces and pointers, returning the pointed-at value.
+// If it encounters a nil interface or pointer, indirect returns that nil value.
+// This can turn into an infinite loop given a cyclic chain,
+// but it matches the Go 1 behavior.
+func indirect(vf reflect.Value) reflect.Value {
+	for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Ptr {
+		if vf.IsNil() {
+			return vf
+		}
+		vf = vf.Elem()
+	}
+	return vf
+}
+
 func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
 	s := parentStack{p: p}
 	for i := range tinfo.fields {
@@ -816,17 +830,9 @@
 					continue
 				}
 			}
-			// Drill into interfaces and pointers.
-			// This can turn into an infinite loop given a cyclic chain,
-			// but it matches the Go 1 behavior.
-			for vf.Kind() == reflect.Interface || vf.Kind() == reflect.Ptr {
-				if vf.IsNil() {
-					return nil
-				}
-				vf = vf.Elem()
-			}
 
 			var scratch [64]byte
+			vf = indirect(vf)
 			switch vf.Kind() {
 			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
 				if err := emit(p, strconv.AppendInt(scratch[:0], vf.Int(), 10)); err != nil {
@@ -861,6 +867,7 @@
 			if err := s.trim(finfo.parents); err != nil {
 				return err
 			}
+			vf = indirect(vf)
 			k := vf.Kind()
 			if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) {
 				return fmt.Errorf("xml: bad type for comment field of %s", val.Type())
@@ -901,6 +908,7 @@
 			continue
 
 		case fInnerXml:
+			vf = indirect(vf)
 			iface := vf.Interface()
 			switch raw := iface.(type) {
 			case []byte:
diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go
index d79b99a..5ec7ece 100644
--- a/src/encoding/xml/marshal_test.go
+++ b/src/encoding/xml/marshal_test.go
@@ -386,6 +386,140 @@
 	return &x
 }
 
+func stringptr(x string) *string {
+	return &x
+}
+
+type T1 struct{}
+type T2 struct{}
+type T3 struct{}
+
+type IndirComment struct {
+	T1      T1
+	Comment *string `xml:",comment"`
+	T2      T2
+}
+
+type DirectComment struct {
+	T1      T1
+	Comment string `xml:",comment"`
+	T2      T2
+}
+
+type IfaceComment struct {
+	T1      T1
+	Comment interface{} `xml:",comment"`
+	T2      T2
+}
+
+type IndirChardata struct {
+	T1       T1
+	Chardata *string `xml:",chardata"`
+	T2       T2
+}
+
+type DirectChardata struct {
+	T1       T1
+	Chardata string `xml:",chardata"`
+	T2       T2
+}
+
+type IfaceChardata struct {
+	T1       T1
+	Chardata interface{} `xml:",chardata"`
+	T2       T2
+}
+
+type IndirCDATA struct {
+	T1    T1
+	CDATA *string `xml:",cdata"`
+	T2    T2
+}
+
+type DirectCDATA struct {
+	T1    T1
+	CDATA string `xml:",cdata"`
+	T2    T2
+}
+
+type IfaceCDATA struct {
+	T1    T1
+	CDATA interface{} `xml:",cdata"`
+	T2    T2
+}
+
+type IndirInnerXML struct {
+	T1       T1
+	InnerXML *string `xml:",innerxml"`
+	T2       T2
+}
+
+type DirectInnerXML struct {
+	T1       T1
+	InnerXML string `xml:",innerxml"`
+	T2       T2
+}
+
+type IfaceInnerXML struct {
+	T1       T1
+	InnerXML interface{} `xml:",innerxml"`
+	T2       T2
+}
+
+type IndirElement struct {
+	T1      T1
+	Element *string
+	T2      T2
+}
+
+type DirectElement struct {
+	T1      T1
+	Element string
+	T2      T2
+}
+
+type IfaceElement struct {
+	T1      T1
+	Element interface{}
+	T2      T2
+}
+
+type IndirOmitEmpty struct {
+	T1        T1
+	OmitEmpty *string `xml:",omitempty"`
+	T2        T2
+}
+
+type DirectOmitEmpty struct {
+	T1        T1
+	OmitEmpty string `xml:",omitempty"`
+	T2        T2
+}
+
+type IfaceOmitEmpty struct {
+	T1        T1
+	OmitEmpty interface{} `xml:",omitempty"`
+	T2        T2
+}
+
+type IndirAny struct {
+	T1  T1
+	Any *string `xml:",any"`
+	T2  T2
+}
+
+type DirectAny struct {
+	T1  T1
+	Any string `xml:",any"`
+	T2  T2
+}
+
+type IfaceAny struct {
+	T1  T1
+	Any interface{} `xml:",any"`
+	T2  T2
+}
+
 var (
 	nameAttr     = "Sarah"
 	ageAttr      = uint(12)
@@ -398,10 +532,12 @@
 // please try to make them two-way as well to ensure that
 // marshaling and unmarshaling are as symmetrical as feasible.
 var marshalTests = []struct {
-	Value         interface{}
-	ExpectXML     string
-	MarshalOnly   bool
-	UnmarshalOnly bool
+	Value          interface{}
+	ExpectXML      string
+	MarshalOnly    bool
+	MarshalError   string
+	UnmarshalOnly  bool
+	UnmarshalError string
 }{
 	// Test nil marshals to nothing
 	{Value: nil, ExpectXML: ``, MarshalOnly: true},
@@ -1133,6 +1269,382 @@
 		ExpectXML: `<NestedAndCData><A><B></B><B></B></A><![CDATA[test]]></NestedAndCData>`,
 		Value:     &NestedAndCData{AB: make([]string, 2), CDATA: "test"},
 	},
+	// Test pointer indirection in various kinds of fields.
+	// https://golang.org/issue/19063
+	{
+		ExpectXML:   `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`,
+		Value:       &IndirComment{Comment: stringptr("hi")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:   `<IndirComment><T1></T1><T2></T2></IndirComment>`,
+		Value:       &IndirComment{Comment: stringptr("")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:    `<IndirComment><T1></T1><T2></T2></IndirComment>`,
+		Value:        &IndirComment{Comment: nil},
+		MarshalError: "xml: bad type for comment field of xml.IndirComment",
+	},
+	{
+		ExpectXML:     `<IndirComment><T1></T1><!--hi--><T2></T2></IndirComment>`,
+		Value:         &IndirComment{Comment: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:   `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`,
+		Value:       &IfaceComment{Comment: "hi"},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceComment><T1></T1><!--hi--><T2></T2></IfaceComment>`,
+		Value:         &IfaceComment{Comment: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:    `<IfaceComment><T1></T1><T2></T2></IfaceComment>`,
+		Value:        &IfaceComment{Comment: nil},
+		MarshalError: "xml: bad type for comment field of xml.IfaceComment",
+	},
+	{
+		ExpectXML:     `<IfaceComment><T1></T1><T2></T2></IfaceComment>`,
+		Value:         &IfaceComment{Comment: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<DirectComment><T1></T1><!--hi--><T2></T2></DirectComment>`,
+		Value:     &DirectComment{Comment: string("hi")},
+	},
+	{
+		ExpectXML: `<DirectComment><T1></T1><T2></T2></DirectComment>`,
+		Value:     &DirectComment{Comment: string("")},
+	},
+	{
+		ExpectXML: `<IndirChardata><T1></T1>hi<T2></T2></IndirChardata>`,
+		Value:     &IndirChardata{Chardata: stringptr("hi")},
+	},
+	{
+		ExpectXML:     `<IndirChardata><T1></T1><![CDATA[hi]]><T2></T2></IndirChardata>`,
+		Value:         &IndirChardata{Chardata: stringptr("hi")},
+		UnmarshalOnly: true, // marshals without CDATA
+	},
+	{
+		ExpectXML: `<IndirChardata><T1></T1><T2></T2></IndirChardata>`,
+		Value:     &IndirChardata{Chardata: stringptr("")},
+	},
+	{
+		ExpectXML:   `<IndirChardata><T1></T1><T2></T2></IndirChardata>`,
+		Value:       &IndirChardata{Chardata: nil},
+		MarshalOnly: true, // unmarshal leaves Chardata=stringptr("")
+	},
+	{
+		ExpectXML:      `<IfaceChardata><T1></T1>hi<T2></T2></IfaceChardata>`,
+		Value:          &IfaceChardata{Chardata: string("hi")},
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML:      `<IfaceChardata><T1></T1><![CDATA[hi]]><T2></T2></IfaceChardata>`,
+		Value:          &IfaceChardata{Chardata: string("hi")},
+		UnmarshalOnly:  true, // marshals without CDATA
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML:      `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`,
+		Value:          &IfaceChardata{Chardata: string("")},
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML:      `<IfaceChardata><T1></T1><T2></T2></IfaceChardata>`,
+		Value:          &IfaceChardata{Chardata: nil},
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML: `<DirectChardata><T1></T1>hi<T2></T2></DirectChardata>`,
+		Value:     &DirectChardata{Chardata: string("hi")},
+	},
+	{
+		ExpectXML:     `<DirectChardata><T1></T1><![CDATA[hi]]><T2></T2></DirectChardata>`,
+		Value:         &DirectChardata{Chardata: string("hi")},
+		UnmarshalOnly: true, // marshals without CDATA
+	},
+	{
+		ExpectXML: `<DirectChardata><T1></T1><T2></T2></DirectChardata>`,
+		Value:     &DirectChardata{Chardata: string("")},
+	},
+	{
+		ExpectXML: `<IndirCDATA><T1></T1><![CDATA[hi]]><T2></T2></IndirCDATA>`,
+		Value:     &IndirCDATA{CDATA: stringptr("hi")},
+	},
+	{
+		ExpectXML:     `<IndirCDATA><T1></T1>hi<T2></T2></IndirCDATA>`,
+		Value:         &IndirCDATA{CDATA: stringptr("hi")},
+		UnmarshalOnly: true, // marshals with CDATA
+	},
+	{
+		ExpectXML: `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`,
+		Value:     &IndirCDATA{CDATA: stringptr("")},
+	},
+	{
+		ExpectXML:   `<IndirCDATA><T1></T1><T2></T2></IndirCDATA>`,
+		Value:       &IndirCDATA{CDATA: nil},
+		MarshalOnly: true, // unmarshal leaves CDATA=stringptr("")
+	},
+	{
+		ExpectXML:      `<IfaceCDATA><T1></T1><![CDATA[hi]]><T2></T2></IfaceCDATA>`,
+		Value:          &IfaceCDATA{CDATA: string("hi")},
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML:      `<IfaceCDATA><T1></T1>hi<T2></T2></IfaceCDATA>`,
+		Value:          &IfaceCDATA{CDATA: string("hi")},
+		UnmarshalOnly:  true, // marshals with CDATA
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML:      `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`,
+		Value:          &IfaceCDATA{CDATA: string("")},
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML:      `<IfaceCDATA><T1></T1><T2></T2></IfaceCDATA>`,
+		Value:          &IfaceCDATA{CDATA: nil},
+		UnmarshalError: "cannot unmarshal into interface {}",
+	},
+	{
+		ExpectXML: `<DirectCDATA><T1></T1><![CDATA[hi]]><T2></T2></DirectCDATA>`,
+		Value:     &DirectCDATA{CDATA: string("hi")},
+	},
+	{
+		ExpectXML:     `<DirectCDATA><T1></T1>hi<T2></T2></DirectCDATA>`,
+		Value:         &DirectCDATA{CDATA: string("hi")},
+		UnmarshalOnly: true, // marshals with CDATA
+	},
+	{
+		ExpectXML: `<DirectCDATA><T1></T1><T2></T2></DirectCDATA>`,
+		Value:     &DirectCDATA{CDATA: string("")},
+	},
+	{
+		ExpectXML:   `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`,
+		Value:       &IndirInnerXML{InnerXML: stringptr("<hi/>")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:   `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`,
+		Value:       &IndirInnerXML{InnerXML: stringptr("")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IndirInnerXML><T1></T1><T2></T2></IndirInnerXML>`,
+		Value:     &IndirInnerXML{InnerXML: nil},
+	},
+	{
+		ExpectXML:     `<IndirInnerXML><T1></T1><hi/><T2></T2></IndirInnerXML>`,
+		Value:         &IndirInnerXML{InnerXML: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:   `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`,
+		Value:       &IfaceInnerXML{InnerXML: "<hi/>"},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceInnerXML><T1></T1><hi/><T2></T2></IfaceInnerXML>`,
+		Value:         &IfaceInnerXML{InnerXML: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`,
+		Value:     &IfaceInnerXML{InnerXML: nil},
+	},
+	{
+		ExpectXML:     `<IfaceInnerXML><T1></T1><T2></T2></IfaceInnerXML>`,
+		Value:         &IfaceInnerXML{InnerXML: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:   `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`,
+		Value:       &DirectInnerXML{InnerXML: string("<hi/>")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<DirectInnerXML><T1></T1><hi/><T2></T2></DirectInnerXML>`,
+		Value:         &DirectInnerXML{InnerXML: string("<T1></T1><hi/><T2></T2>")},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:   `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`,
+		Value:       &DirectInnerXML{InnerXML: string("")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<DirectInnerXML><T1></T1><T2></T2></DirectInnerXML>`,
+		Value:         &DirectInnerXML{InnerXML: string("<T1></T1><T2></T2>")},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IndirElement><T1></T1><Element>hi</Element><T2></T2></IndirElement>`,
+		Value:     &IndirElement{Element: stringptr("hi")},
+	},
+	{
+		ExpectXML: `<IndirElement><T1></T1><Element></Element><T2></T2></IndirElement>`,
+		Value:     &IndirElement{Element: stringptr("")},
+	},
+	{
+		ExpectXML: `<IndirElement><T1></T1><T2></T2></IndirElement>`,
+		Value:     &IndirElement{Element: nil},
+	},
+	{
+		ExpectXML:   `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`,
+		Value:       &IfaceElement{Element: "hi"},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceElement><T1></T1><Element>hi</Element><T2></T2></IfaceElement>`,
+		Value:         &IfaceElement{Element: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IfaceElement><T1></T1><T2></T2></IfaceElement>`,
+		Value:     &IfaceElement{Element: nil},
+	},
+	{
+		ExpectXML:     `<IfaceElement><T1></T1><T2></T2></IfaceElement>`,
+		Value:         &IfaceElement{Element: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<DirectElement><T1></T1><Element>hi</Element><T2></T2></DirectElement>`,
+		Value:     &DirectElement{Element: string("hi")},
+	},
+	{
+		ExpectXML: `<DirectElement><T1></T1><Element></Element><T2></T2></DirectElement>`,
+		Value:     &DirectElement{Element: string("")},
+	},
+	{
+		ExpectXML: `<IndirOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IndirOmitEmpty>`,
+		Value:     &IndirOmitEmpty{OmitEmpty: stringptr("hi")},
+	},
+	{
+		// Note: Changed in Go 1.8 to include <OmitEmpty> element (because x.OmitEmpty != nil).
+		ExpectXML:   `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`,
+		Value:       &IndirOmitEmpty{OmitEmpty: stringptr("")},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IndirOmitEmpty><T1></T1><OmitEmpty></OmitEmpty><T2></T2></IndirOmitEmpty>`,
+		Value:         &IndirOmitEmpty{OmitEmpty: stringptr("")},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IndirOmitEmpty><T1></T1><T2></T2></IndirOmitEmpty>`,
+		Value:     &IndirOmitEmpty{OmitEmpty: nil},
+	},
+	{
+		ExpectXML:   `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`,
+		Value:       &IfaceOmitEmpty{OmitEmpty: "hi"},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></IfaceOmitEmpty>`,
+		Value:         &IfaceOmitEmpty{OmitEmpty: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`,
+		Value:     &IfaceOmitEmpty{OmitEmpty: nil},
+	},
+	{
+		ExpectXML:     `<IfaceOmitEmpty><T1></T1><T2></T2></IfaceOmitEmpty>`,
+		Value:         &IfaceOmitEmpty{OmitEmpty: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<DirectOmitEmpty><T1></T1><OmitEmpty>hi</OmitEmpty><T2></T2></DirectOmitEmpty>`,
+		Value:     &DirectOmitEmpty{OmitEmpty: string("hi")},
+	},
+	{
+		ExpectXML: `<DirectOmitEmpty><T1></T1><T2></T2></DirectOmitEmpty>`,
+		Value:     &DirectOmitEmpty{OmitEmpty: string("")},
+	},
+	{
+		ExpectXML: `<IndirAny><T1></T1><Any>hi</Any><T2></T2></IndirAny>`,
+		Value:     &IndirAny{Any: stringptr("hi")},
+	},
+	{
+		ExpectXML: `<IndirAny><T1></T1><Any></Any><T2></T2></IndirAny>`,
+		Value:     &IndirAny{Any: stringptr("")},
+	},
+	{
+		ExpectXML: `<IndirAny><T1></T1><T2></T2></IndirAny>`,
+		Value:     &IndirAny{Any: nil},
+	},
+	{
+		ExpectXML:   `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`,
+		Value:       &IfaceAny{Any: "hi"},
+		MarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceAny><T1></T1><Any>hi</Any><T2></T2></IfaceAny>`,
+		Value:         &IfaceAny{Any: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<IfaceAny><T1></T1><T2></T2></IfaceAny>`,
+		Value:     &IfaceAny{Any: nil},
+	},
+	{
+		ExpectXML:     `<IfaceAny><T1></T1><T2></T2></IfaceAny>`,
+		Value:         &IfaceAny{Any: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML: `<DirectAny><T1></T1><Any>hi</Any><T2></T2></DirectAny>`,
+		Value:     &DirectAny{Any: string("hi")},
+	},
+	{
+		ExpectXML: `<DirectAny><T1></T1><Any></Any><T2></T2></DirectAny>`,
+		Value:     &DirectAny{Any: string("")},
+	},
+	{
+		ExpectXML:     `<IndirFoo><T1></T1><Foo>hi</Foo><T2></T2></IndirFoo>`,
+		Value:         &IndirAny{Any: stringptr("hi")},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IndirFoo><T1></T1><Foo></Foo><T2></T2></IndirFoo>`,
+		Value:         &IndirAny{Any: stringptr("")},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IndirFoo><T1></T1><T2></T2></IndirFoo>`,
+		Value:         &IndirAny{Any: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceFoo><T1></T1><Foo>hi</Foo><T2></T2></IfaceFoo>`,
+		Value:         &IfaceAny{Any: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`,
+		Value:         &IfaceAny{Any: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<IfaceFoo><T1></T1><T2></T2></IfaceFoo>`,
+		Value:         &IfaceAny{Any: nil},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<DirectFoo><T1></T1><Foo>hi</Foo><T2></T2></DirectFoo>`,
+		Value:         &DirectAny{Any: string("hi")},
+		UnmarshalOnly: true,
+	},
+	{
+		ExpectXML:     `<DirectFoo><T1></T1><Foo></Foo><T2></T2></DirectFoo>`,
+		Value:         &DirectAny{Any: string("")},
+		UnmarshalOnly: true,
+	},
 }
 
 func TestMarshal(t *testing.T) {
@@ -1142,7 +1654,17 @@
 		}
 		data, err := Marshal(test.Value)
 		if err != nil {
-			t.Errorf("#%d: marshal(%#v): %s", idx, test.Value, err)
+			if test.MarshalError == "" {
+				t.Errorf("#%d: marshal(%#v): %s", idx, test.Value, err)
+				continue
+			}
+			if !strings.Contains(err.Error(), test.MarshalError) {
+				t.Errorf("#%d: marshal(%#v): %s, want %q", idx, test.Value, err, test.MarshalError)
+			}
+			continue
+		}
+		if test.MarshalError != "" {
+			t.Errorf("#%d: Marshal succeeded, want error %q", idx, test.MarshalError)
 			continue
 		}
 		if got, want := string(data), test.ExpectXML; got != want {
@@ -1268,8 +1790,16 @@
 		}
 
 		if err != nil {
-			t.Errorf("#%d: unexpected error: %#v", i, err)
-		} else if got, want := dest, test.Value; !reflect.DeepEqual(got, want) {
+			if test.UnmarshalError == "" {
+				t.Errorf("#%d: unmarshal(%#v): %s", i, test.ExpectXML, err)
+				continue
+			}
+			if !strings.Contains(err.Error(), test.UnmarshalError) {
+				t.Errorf("#%d: unmarshal(%#v): %s, want %q", i, test.ExpectXML, err, test.UnmarshalError)
+			}
+			continue
+		}
+		if got, want := dest, test.Value; !reflect.DeepEqual(got, want) {
 			t.Errorf("#%d: unmarshal(%q):\nhave %#v\nwant %#v", i, test.ExpectXML, got, want)
 		}
 	}
diff --git a/src/go/build/build.go b/src/go/build/build.go
index da12d50..f11bc0c 100644
--- a/src/go/build/build.go
+++ b/src/go/build/build.go
@@ -266,7 +266,7 @@
 	}
 	if home := os.Getenv(env); home != "" {
 		def := filepath.Join(home, "go")
-		if def == runtime.GOROOT() {
+		if filepath.Clean(def) == filepath.Clean(runtime.GOROOT()) {
 			// Don't set the default GOPATH to GOROOT,
 			// as that will trigger warnings from the go tool.
 			return ""
diff --git a/src/go/printer/nodes.go b/src/go/printer/nodes.go
index 11f26d4..ea43286 100644
--- a/src/go/printer/nodes.go
+++ b/src/go/printer/nodes.go
@@ -733,7 +733,7 @@
 
 	case *ast.FuncLit:
 		p.expr(x.Type)
-		p.adjBlock(p.distanceFrom(x.Type.Pos()), blank, x.Body)
+		p.funcBody(p.distanceFrom(x.Type.Pos()), blank, x.Body)
 
 	case *ast.ParenExpr:
 		if _, hasParens := x.X.(*ast.ParenExpr); hasParens {
@@ -825,6 +825,7 @@
 		if x.Type != nil {
 			p.expr1(x.Type, token.HighestPrec, depth)
 		}
+		p.level++
 		p.print(x.Lbrace, token.LBRACE)
 		p.exprList(x.Lbrace, x.Elts, 1, commaTerm, x.Rbrace)
 		// do not insert extra line break following a /*-style comment
@@ -837,6 +838,7 @@
 			mode |= noExtraBlank
 		}
 		p.print(mode, x.Rbrace, token.RBRACE, mode)
+		p.level--
 
 	case *ast.Ellipsis:
 		p.print(token.ELLIPSIS)
@@ -1557,18 +1559,23 @@
 	return bodySize
 }
 
-// adjBlock prints an "adjacent" block (e.g., a for-loop or function body) following
-// a header (e.g., a for-loop control clause or function signature) of given headerSize.
+// funcBody prints a function body following a function header of given headerSize.
 // If the header's and block's size are "small enough" and the block is "simple enough",
 // the block is printed on the current line, without line breaks, spaced from the header
 // by sep. Otherwise the block's opening "{" is printed on the current line, followed by
 // lines for the block's statements and its closing "}".
 //
-func (p *printer) adjBlock(headerSize int, sep whiteSpace, b *ast.BlockStmt) {
+func (p *printer) funcBody(headerSize int, sep whiteSpace, b *ast.BlockStmt) {
 	if b == nil {
 		return
 	}
 
+	// save/restore composite literal nesting level
+	defer func(level int) {
+		p.level = level
+	}(p.level)
+	p.level = 0
+
 	const maxSize = 100
 	if headerSize+p.bodySize(b, maxSize) <= maxSize {
 		p.print(sep, b.Lbrace, token.LBRACE)
@@ -1613,7 +1620,7 @@
 	}
 	p.expr(d.Name)
 	p.signature(d.Type.Params, d.Type.Results)
-	p.adjBlock(p.distanceFrom(d.Pos()), vtab, d.Body)
+	p.funcBody(p.distanceFrom(d.Pos()), vtab, d.Body)
 }
 
 func (p *printer) decl(decl ast.Decl) {
diff --git a/src/go/printer/printer.go b/src/go/printer/printer.go
index eabf23e..be61dad 100644
--- a/src/go/printer/printer.go
+++ b/src/go/printer/printer.go
@@ -58,6 +58,7 @@
 	// Current state
 	output      []byte       // raw printer result
 	indent      int          // current indentation
+	level       int          // level == 0: outside composite literal; level > 0: inside composite literal
 	mode        pmode        // current printer mode
 	impliedSemi bool         // if set, a linebreak implies a semicolon
 	lastTok     token.Token  // last token printed (token.ILLEGAL if it's whitespace)
@@ -744,15 +745,19 @@
 		// follows on the same line but is not a comma, and not a "closing"
 		// token immediately following its corresponding "opening" token,
 		// add an extra separator unless explicitly disabled. Use a blank
-		// as separator unless we have pending linebreaks and they are not
-		// disabled, in which case we want a linebreak (issue 15137).
+		// as separator unless we have pending linebreaks, they are not
+		// disabled, and we are outside a composite literal, in which case
+		// we want a linebreak (issue 15137).
+		// TODO(gri) This has become overly complicated. We should be able
+		// to track whether we're inside an expression or statement and
+		// use that information to decide more directly.
 		needsLinebreak := false
 		if p.mode&noExtraBlank == 0 &&
 			last.Text[1] == '*' && p.lineFor(last.Pos()) == next.Line &&
 			tok != token.COMMA &&
 			(tok != token.RPAREN || p.prevOpen == token.LPAREN) &&
 			(tok != token.RBRACK || p.prevOpen == token.LBRACK) {
-			if p.containsLinebreak() && p.mode&noExtraLinebreak == 0 {
+			if p.containsLinebreak() && p.mode&noExtraLinebreak == 0 && p.level == 0 {
 				needsLinebreak = true
 			} else {
 				p.writeByte(' ', 1)
diff --git a/src/go/printer/testdata/comments2.golden b/src/go/printer/testdata/comments2.golden
index 7676a26..8b3a94d 100644
--- a/src/go/printer/testdata/comments2.golden
+++ b/src/go/printer/testdata/comments2.golden
@@ -103,3 +103,62 @@
 	mask := uint64(1)<<c - 1		// Allocation mask
 	used := atomic.LoadUint64(&h.used)	// Current allocations
 }
+
+// Test cases for issue 18782
+var _ = [][]int{
+	/*       a, b, c, d, e */
+	/* a */ {0, 0, 0, 0, 0},
+	/* b */ {0, 5, 4, 4, 4},
+	/* c */ {0, 4, 5, 4, 4},
+	/* d */ {0, 4, 4, 5, 4},
+	/* e */ {0, 4, 4, 4, 5},
+}
+
+var _ = T{ /* a */ 0}
+
+var _ = T{ /* a */ /* b */ 0}
+
+var _ = T{	/* a */	/* b */
+	/* c */ 0,
+}
+
+var _ = T{	/* a */	/* b */
+	/* c */
+	/* d */ 0,
+}
+
+var _ = T{
+	/* a */
+	/* b */ 0,
+}
+
+var _ = T{ /* a */ {}}
+
+var _ = T{ /* a */ /* b */ {}}
+
+var _ = T{	/* a */	/* b */
+	/* c */ {},
+}
+
+var _ = T{	/* a */	/* b */
+	/* c */
+	/* d */ {},
+}
+
+var _ = T{
+	/* a */
+	/* b */ {},
+}
+
+var _ = []T{
+	func() {
+		var _ = [][]int{
+			/*       a, b, c, d, e */
+			/* a */ {0, 0, 0, 0, 0},
+			/* b */ {0, 5, 4, 4, 4},
+			/* c */ {0, 4, 5, 4, 4},
+			/* d */ {0, 4, 4, 5, 4},
+			/* e */ {0, 4, 4, 4, 5},
+		}
+	},
+}
diff --git a/src/go/printer/testdata/comments2.input b/src/go/printer/testdata/comments2.input
index 4a055c8..8d38c41 100644
--- a/src/go/printer/testdata/comments2.input
+++ b/src/go/printer/testdata/comments2.input
@@ -103,3 +103,66 @@
    mask := uint64(1)<<c - 1 // Allocation mask
    used := atomic.LoadUint64(&h.used) // Current allocations
 }
+
+// Test cases for issue 18782
+var _ = [][]int{
+   /*       a, b, c, d, e */
+   /* a */ {0, 0, 0, 0, 0},
+   /* b */ {0, 5, 4, 4, 4},
+   /* c */ {0, 4, 5, 4, 4},
+   /* d */ {0, 4, 4, 5, 4},
+   /* e */ {0, 4, 4, 4, 5},
+}
+
+var _ = T{ /* a */ 0,
+}
+
+var _ = T{ /* a */ /* b */ 0,
+}
+
+var _ = T{ /* a */ /* b */
+   /* c */ 0,
+}
+
+var _ = T{ /* a */ /* b */
+   /* c */
+   /* d */ 0,
+}
+
+var _ = T{
+   /* a */
+   /* b */ 0,
+}
+
+var _ = T{ /* a */ {},
+}
+
+var _ = T{ /* a */ /* b */ {},
+}
+
+var _ = T{ /* a */ /* b */
+   /* c */ {},
+}
+
+var _ = T{ /* a */ /* b */
+   /* c */
+   /* d */ {},
+}
+
+var _ = T{
+   /* a */
+   /* b */ {},
+}
+
+var _ = []T{
+   func() {
+      var _ = [][]int{
+         /*       a, b, c, d, e */
+         /* a */ {0, 0, 0, 0, 0},
+         /* b */ {0, 5, 4, 4, 4},
+         /* c */ {0, 4, 5, 4, 4},
+         /* d */ {0, 4, 4, 5, 4},
+         /* e */ {0, 4, 4, 4, 5},
+      }
+   },
+}
diff --git a/src/net/http/client.go b/src/net/http/client.go
index d368bae..0005538 100644
--- a/src/net/http/client.go
+++ b/src/net/http/client.go
@@ -413,11 +413,12 @@
 
 // redirectBehavior describes what should happen when the
 // client encounters a 3xx status code from the server
-func redirectBehavior(reqMethod string, resp *Response, ireq *Request) (redirectMethod string, shouldRedirect bool) {
+func redirectBehavior(reqMethod string, resp *Response, ireq *Request) (redirectMethod string, shouldRedirect, includeBody bool) {
 	switch resp.StatusCode {
 	case 301, 302, 303:
 		redirectMethod = reqMethod
 		shouldRedirect = true
+		includeBody = false
 
 		// RFC 2616 allowed automatic redirection only with GET and
 		// HEAD requests. RFC 7231 lifts this restriction, but we still
@@ -429,6 +430,7 @@
 	case 307, 308:
 		redirectMethod = reqMethod
 		shouldRedirect = true
+		includeBody = true
 
 		// Treat 307 and 308 specially, since they're new in
 		// Go 1.8, and they also require re-sending the request body.
@@ -449,7 +451,7 @@
 			shouldRedirect = false
 		}
 	}
-	return redirectMethod, shouldRedirect
+	return redirectMethod, shouldRedirect, includeBody
 }
 
 // Do sends an HTTP request and returns an HTTP response, following
@@ -492,11 +494,14 @@
 	}
 
 	var (
-		deadline       = c.deadline()
-		reqs           []*Request
-		resp           *Response
-		copyHeaders    = c.makeHeadersCopier(req)
+		deadline    = c.deadline()
+		reqs        []*Request
+		resp        *Response
+		copyHeaders = c.makeHeadersCopier(req)
+
+		// Redirect behavior:
 		redirectMethod string
+		includeBody    bool
 	)
 	uerr := func(err error) error {
 		req.closeBody()
@@ -534,7 +539,7 @@
 				Cancel:   ireq.Cancel,
 				ctx:      ireq.ctx,
 			}
-			if ireq.GetBody != nil {
+			if includeBody && ireq.GetBody != nil {
 				req.Body, err = ireq.GetBody()
 				if err != nil {
 					return nil, uerr(err)
@@ -598,7 +603,7 @@
 		}
 
 		var shouldRedirect bool
-		redirectMethod, shouldRedirect = redirectBehavior(req.Method, resp, reqs[0])
+		redirectMethod, shouldRedirect, includeBody = redirectBehavior(req.Method, resp, reqs[0])
 		if !shouldRedirect {
 			return resp, nil
 		}
diff --git a/src/net/http/client_test.go b/src/net/http/client_test.go
index eaf2cdc..4f674dd 100644
--- a/src/net/http/client_test.go
+++ b/src/net/http/client_test.go
@@ -360,25 +360,25 @@
 	wantSegments := []string{
 		`POST / "first"`,
 		`POST /?code=301&next=302 "c301"`,
-		`GET /?code=302 "c301"`,
-		`GET / "c301"`,
+		`GET /?code=302 ""`,
+		`GET / ""`,
 		`POST /?code=302&next=302 "c302"`,
-		`GET /?code=302 "c302"`,
-		`GET / "c302"`,
+		`GET /?code=302 ""`,
+		`GET / ""`,
 		`POST /?code=303&next=301 "c303wc301"`,
-		`GET /?code=301 "c303wc301"`,
-		`GET / "c303wc301"`,
+		`GET /?code=301 ""`,
+		`GET / ""`,
 		`POST /?code=304 "c304"`,
 		`POST /?code=305 "c305"`,
 		`POST /?code=307&next=303,308,302 "c307"`,
 		`POST /?code=303&next=308,302 "c307"`,
-		`GET /?code=308&next=302 "c307"`,
+		`GET /?code=308&next=302 ""`,
 		`GET /?code=302 "c307"`,
-		`GET / "c307"`,
+		`GET / ""`,
 		`POST /?code=308&next=302,301 "c308"`,
 		`POST /?code=302&next=301 "c308"`,
-		`GET /?code=301 "c308"`,
-		`GET / "c308"`,
+		`GET /?code=301 ""`,
+		`GET / ""`,
 		`POST /?code=404 "c404"`,
 	}
 	want := strings.Join(wantSegments, "\n")
@@ -399,20 +399,20 @@
 	wantSegments := []string{
 		`DELETE / "first"`,
 		`DELETE /?code=301&next=302,308 "c301"`,
-		`GET /?code=302&next=308 "c301"`,
-		`GET /?code=308 "c301"`,
+		`GET /?code=302&next=308 ""`,
+		`GET /?code=308 ""`,
 		`GET / "c301"`,
 		`DELETE /?code=302&next=302 "c302"`,
-		`GET /?code=302 "c302"`,
-		`GET / "c302"`,
+		`GET /?code=302 ""`,
+		`GET / ""`,
 		`DELETE /?code=303 "c303"`,
-		`GET / "c303"`,
+		`GET / ""`,
 		`DELETE /?code=307&next=301,308,303,302,304 "c307"`,
 		`DELETE /?code=301&next=308,303,302,304 "c307"`,
-		`GET /?code=308&next=303,302,304 "c307"`,
+		`GET /?code=308&next=303,302,304 ""`,
 		`GET /?code=303&next=302,304 "c307"`,
-		`GET /?code=302&next=304 "c307"`,
-		`GET /?code=304 "c307"`,
+		`GET /?code=302&next=304 ""`,
+		`GET /?code=304 ""`,
 		`DELETE /?code=308&next=307 "c308"`,
 		`DELETE /?code=307 "c308"`,
 		`DELETE / "c308"`,
@@ -432,7 +432,11 @@
 	ts = httptest.NewServer(HandlerFunc(func(w ResponseWriter, r *Request) {
 		log.Lock()
 		slurp, _ := ioutil.ReadAll(r.Body)
-		fmt.Fprintf(&log.Buffer, "%s %s %q\n", r.Method, r.RequestURI, slurp)
+		fmt.Fprintf(&log.Buffer, "%s %s %q", r.Method, r.RequestURI, slurp)
+		if cl := r.Header.Get("Content-Length"); r.Method == "GET" && len(slurp) == 0 && (r.ContentLength != 0 || cl != "") {
+			fmt.Fprintf(&log.Buffer, " (but with body=%T, content-length = %v, %q)", r.Body, r.ContentLength, cl)
+		}
+		log.WriteByte('\n')
 		log.Unlock()
 		urlQuery := r.URL.Query()
 		if v := urlQuery.Get("code"); v != "" {
@@ -475,7 +479,24 @@
 	want = strings.TrimSpace(want)
 
 	if got != want {
-		t.Errorf("Log differs.\n Got:\n%s\nWant:\n%s\n", got, want)
+		got, want, lines := removeCommonLines(got, want)
+		t.Errorf("Log differs after %d common lines.\n\nGot:\n%s\n\nWant:\n%s\n", lines, got, want)
+	}
+}
+
+func removeCommonLines(a, b string) (asuffix, bsuffix string, commonLines int) {
+	for {
+		nl := strings.IndexByte(a, '\n')
+		if nl < 0 {
+			return a, b, commonLines
+		}
+		line := a[:nl+1]
+		if !strings.HasPrefix(b, line) {
+			return a, b, commonLines
+		}
+		commonLines++
+		a = a[len(line):]
+		b = b[len(line):]
 	}
 }
 
diff --git a/src/net/http/serve_test.go b/src/net/http/serve_test.go
index 681dff1..73dd56e 100644
--- a/src/net/http/serve_test.go
+++ b/src/net/http/serve_test.go
@@ -5277,7 +5277,7 @@
 		defer conn.Close()
 		slurp, err := ioutil.ReadAll(buf.Reader)
 		if err != nil {
-			t.Error("Copy: %v", err)
+			t.Errorf("Copy: %v", err)
 		}
 		allX := true
 		for _, v := range slurp {
diff --git a/src/reflect/all_test.go b/src/reflect/all_test.go
index 022350b..3eca293 100644
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@@ -2478,17 +2478,24 @@
 }
 
 func TestPtrTo(t *testing.T) {
+	// This block of code means that the ptrToThis field of the
+	// reflect data for *unsafe.Pointer is non zero, see
+	// https://golang.org/issue/19003
+	var x unsafe.Pointer
+	var y = &x
+	var z = &y
+
 	var i int
 
-	typ := TypeOf(i)
+	typ := TypeOf(z)
 	for i = 0; i < 100; i++ {
 		typ = PtrTo(typ)
 	}
 	for i = 0; i < 100; i++ {
 		typ = typ.Elem()
 	}
-	if typ != TypeOf(i) {
-		t.Errorf("after 100 PtrTo and Elem, have %s, want %s", typ, TypeOf(i))
+	if typ != TypeOf(z) {
+		t.Errorf("after 100 PtrTo and Elem, have %s, want %s", typ, TypeOf(z))
 	}
 }
 
diff --git a/src/reflect/type.go b/src/reflect/type.go
index 9d6e7a6..5d3c5c6 100644
--- a/src/reflect/type.go
+++ b/src/reflect/type.go
@@ -1469,6 +1469,7 @@
 	pp := *prototype
 
 	pp.str = resolveReflectName(newName(s, "", "", false))
+	pp.ptrToThis = 0
 
 	// For the type structures linked into the binary, the
 	// compiler provides a good hash of the string.
diff --git a/src/runtime/memmove_amd64.s b/src/runtime/memmove_amd64.s
index 464f5fd..c2286d3 100644
--- a/src/runtime/memmove_amd64.s
+++ b/src/runtime/memmove_amd64.s
@@ -146,10 +146,16 @@
 move_0:
 	RET
 move_3or4:
+	CMPQ	BX, $4
+	JB	move_3
+	MOVL	(SI), AX
+	MOVL	AX, (DI)
+	RET
+move_3:
 	MOVW	(SI), AX
-	MOVW	-2(SI)(BX*1), CX
+	MOVB	2(SI), CX
 	MOVW	AX, (DI)
-	MOVW	CX, -2(DI)(BX*1)
+	MOVB	CX, 2(DI)
 	RET
 move_5through7:
 	MOVL	(SI), AX
diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go
index dbfa284..74b8753 100644
--- a/src/runtime/memmove_test.go
+++ b/src/runtime/memmove_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"crypto/rand"
+	"encoding/binary"
 	"fmt"
 	"internal/race"
 	. "runtime"
@@ -447,3 +448,22 @@
 		_ = y
 	}
 }
+
+func BenchmarkIssue18740(b *testing.B) {
+	// This tests that memmove uses one 4-byte load/store to move 4 bytes.
+	// It used to do 2 2-byte load/stores, which leads to a pipeline stall
+	// when we try to read the result with one 4-byte load.
+	var buf [4]byte
+	for j := 0; j < b.N; j++ {
+		s := uint32(0)
+		for i := 0; i < 4096; i += 4 {
+			copy(buf[:], g[i:])
+			s += binary.LittleEndian.Uint32(buf[:])
+		}
+		sink = uint64(s)
+	}
+}
+
+// TODO: 2 byte and 8 byte benchmarks also.
+
+var g [4096]byte
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 0b996d8..cd57720 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -628,10 +628,12 @@
 //go:nowritebarrier
 func (c *gcControllerState) enlistWorker() {
 	// If there are idle Ps, wake one so it will run an idle worker.
-	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
-		wakep()
-		return
-	}
+	// NOTE: This is suspected of causing deadlocks. See golang.org/issue/19112.
+	//
+	//	if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 {
+	//		wakep()
+	//		return
+	//	}
 
 	// There are no idle Ps. If we need more dedicated workers,
 	// try to preempt a running P so it will switch to a worker.
diff --git a/src/runtime/msan.go b/src/runtime/msan.go
index 7177c8e..c0f3957 100644
--- a/src/runtime/msan.go
+++ b/src/runtime/msan.go
@@ -28,9 +28,11 @@
 // the runtime, but operations like a slice copy can call msanread
 // anyhow for values on the stack. Just ignore msanread when running
 // on the system stack. The other msan functions are fine.
+//
+//go:nosplit
 func msanread(addr unsafe.Pointer, sz uintptr) {
 	g := getg()
-	if g == g.m.g0 || g == g.m.gsignal {
+	if g == nil || g.m == nil || g == g.m.g0 || g == g.m.gsignal {
 		return
 	}
 	domsanread(addr, sz)
diff --git a/src/runtime/runtime-gdb_test.go b/src/runtime/runtime-gdb_test.go
index c284437..f886961 100644
--- a/src/runtime/runtime-gdb_test.go
+++ b/src/runtime/runtime-gdb_test.go
@@ -7,6 +7,7 @@
 import (
 	"bytes"
 	"fmt"
+	"go/build"
 	"internal/testenv"
 	"io/ioutil"
 	"os"
@@ -67,7 +68,6 @@
 }
 
 const helloSource = `
-package main
 import "fmt"
 var gslice []string
 func main() {
@@ -85,9 +85,20 @@
 `
 
 func TestGdbPython(t *testing.T) {
+	testGdbPython(t, false)
+}
+
+func TestGdbPythonCgo(t *testing.T) {
+	testGdbPython(t, true)
+}
+
+func testGdbPython(t *testing.T, cgo bool) {
 	if runtime.GOARCH == "mips64" {
 		testenv.SkipFlaky(t, 18173)
 	}
+	if cgo && !build.Default.CgoEnabled {
+		t.Skip("skipping because cgo is not enabled")
+	}
 
 	t.Parallel()
 	checkGdbEnvironment(t)
@@ -100,8 +111,15 @@
 	}
 	defer os.RemoveAll(dir)
 
+	var buf bytes.Buffer
+	buf.WriteString("package main\n")
+	if cgo {
+		buf.WriteString(`import "C"` + "\n")
+	}
+	buf.WriteString(helloSource)
+
 	src := filepath.Join(dir, "main.go")
-	err = ioutil.WriteFile(src, []byte(helloSource), 0644)
+	err = ioutil.WriteFile(src, buf.Bytes(), 0644)
 	if err != nil {
 		t.Fatalf("failed to create file: %v", err)
 	}
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index f521906..ed82783 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -285,6 +285,25 @@
 			md.gcbssmask = progToPointerMask((*byte)(unsafe.Pointer(md.gcbss)), md.ebss-md.bss)
 		}
 	}
+
+	// Modules appear in the moduledata linked list in the order they are
+	// loaded by the dynamic loader, with one exception: the
+	// firstmoduledata itself the module that contains the runtime. This
+	// is not always the first module (when using -buildmode=shared, it
+	// is typically libstd.so, the second module). The order matters for
+	// typelinksinit, so we swap the first module with whatever module
+	// contains the main function.
+	//
+	// See Issue #18729.
+	mainText := funcPC(main_main)
+	for i, md := range *modules {
+		if md.text <= mainText && mainText <= md.etext {
+			(*modules)[0] = md
+			(*modules)[i] = &firstmoduledata
+			break
+		}
+	}
+
 	atomicstorep(unsafe.Pointer(&modulesSlice), unsafe.Pointer(modules))
 }
 
diff --git a/src/runtime/testdata/testprogcgo/threadpprof.go b/src/runtime/testdata/testprogcgo/threadpprof.go
index 44afb91..3da8296 100644
--- a/src/runtime/testdata/testprogcgo/threadpprof.go
+++ b/src/runtime/testdata/testprogcgo/threadpprof.go
@@ -61,7 +61,7 @@
 	return 0;
 }
 
-void runCPUHogThread() {
+void runCPUHogThread(void) {
 	pthread_t tid;
 	pthread_create(&tid, 0, cpuHogDriver, 0);
 }
diff --git a/src/runtime/testdata/testprogcgo/traceback.go b/src/runtime/testdata/testprogcgo/traceback.go
index e8b0a04..2a023f6 100644
--- a/src/runtime/testdata/testprogcgo/traceback.go
+++ b/src/runtime/testdata/testprogcgo/traceback.go
@@ -15,16 +15,16 @@
 
 char *p;
 
-static int f3() {
+static int f3(void) {
 	*p = 0;
 	return 0;
 }
 
-static int f2() {
+static int f2(void) {
 	return f3();
 }
 
-static int f1() {
+static int f1(void) {
 	return f2();
 }
 
diff --git a/src/testing/testing.go b/src/testing/testing.go
index ddbdc25..bd19a31 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -821,6 +821,7 @@
 	haveExamples = len(m.examples) > 0
 	testRan, testOk := runTests(m.deps.MatchString, m.tests)
 	exampleRan, exampleOk := runExamples(m.deps.MatchString, m.examples)
+	stopAlarm()
 	if !testRan && !exampleRan && *matchBenchmarks == "" {
 		fmt.Fprintln(os.Stderr, "testing: warning: no tests to run")
 	}
diff --git a/src/vendor/golang_org/x/crypto/curve25519/const_amd64.h b/src/vendor/golang_org/x/crypto/curve25519/const_amd64.h
new file mode 100644
index 0000000..80ad222
--- /dev/null
+++ b/src/vendor/golang_org/x/crypto/curve25519/const_amd64.h
@@ -0,0 +1,8 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This code was translated into a form compatible with 6a from the public
+// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
+
+#define REDMASK51     0x0007FFFFFFFFFFFF
diff --git a/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s
index 797f9b0..0ad5398 100644
--- a/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s
+++ b/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s
@@ -7,8 +7,8 @@
 
 // +build amd64,!gccgo,!appengine
 
-DATA ·REDMASK51(SB)/8, $0x0007FFFFFFFFFFFF
-GLOBL ·REDMASK51(SB), 8, $8
+// These constants cannot be encoded in non-MOVQ immediates.
+// We access them directly from memory instead.
 
 DATA ·_121666_213(SB)/8, $996687872
 GLOBL ·_121666_213(SB), 8, $8
diff --git a/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s
index 932800b..536479b 100644
--- a/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s
+++ b/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s
@@ -7,6 +7,8 @@
 
 // +build amd64,!gccgo,!appengine
 
+#include "const_amd64.h"
+
 // func freeze(inout *[5]uint64)
 TEXT ·freeze(SB),7,$0-8
 	MOVQ inout+0(FP), DI
@@ -16,7 +18,7 @@
 	MOVQ 16(DI),CX
 	MOVQ 24(DI),R8
 	MOVQ 32(DI),R9
-	MOVQ ·REDMASK51(SB),AX
+	MOVQ $REDMASK51,AX
 	MOVQ AX,R10
 	SUBQ $18,R10
 	MOVQ $3,R11
diff --git a/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s
index ee7b36c..7074e5c 100644
--- a/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s
+++ b/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s
@@ -7,6 +7,8 @@
 
 // +build amd64,!gccgo,!appengine
 
+#include "const_amd64.h"
+
 // func ladderstep(inout *[5][5]uint64)
 TEXT ·ladderstep(SB),0,$296-8
 	MOVQ inout+0(FP),DI
@@ -118,7 +120,7 @@
 	MULQ 72(SP)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -233,7 +235,7 @@
 	MULQ 32(SP)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -438,7 +440,7 @@
 	MULQ 72(SP)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -588,7 +590,7 @@
 	MULQ 32(SP)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -728,7 +730,7 @@
 	MULQ 152(DI)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -843,7 +845,7 @@
 	MULQ 192(DI)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -993,7 +995,7 @@
 	MULQ 32(DI)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -1143,7 +1145,7 @@
 	MULQ 112(SP)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
@@ -1329,7 +1331,7 @@
 	MULQ 192(SP)
 	ADDQ AX,R12
 	ADCQ DX,R13
-	MOVQ ·REDMASK51(SB),DX
+	MOVQ $REDMASK51,DX
 	SHLQ $13,CX:SI
 	ANDQ DX,SI
 	SHLQ $13,R9:R8
diff --git a/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s
index 33ce57d..b162e65 100644
--- a/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s
+++ b/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s
@@ -7,6 +7,8 @@
 
 // +build amd64,!gccgo,!appengine
 
+#include "const_amd64.h"
+
 // func mul(dest, a, b *[5]uint64)
 TEXT ·mul(SB),0,$16-24
 	MOVQ dest+0(FP), DI
@@ -121,7 +123,7 @@
 	MULQ 32(CX)
 	ADDQ AX,R14
 	ADCQ DX,R15
-	MOVQ ·REDMASK51(SB),SI
+	MOVQ $REDMASK51,SI
 	SHLQ $13,R9:R8
 	ANDQ SI,R8
 	SHLQ $13,R11:R10
diff --git a/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s
index 3a92804..4e864a8 100644
--- a/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s
+++ b/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s
@@ -7,6 +7,8 @@
 
 // +build amd64,!gccgo,!appengine
 
+#include "const_amd64.h"
+
 // func square(out, in *[5]uint64)
 TEXT ·square(SB),7,$0-16
 	MOVQ out+0(FP), DI
@@ -84,7 +86,7 @@
 	MULQ 32(SI)
 	ADDQ AX,R13
 	ADCQ DX,R14
-	MOVQ ·REDMASK51(SB),SI
+	MOVQ $REDMASK51,SI
 	SHLQ $13,R8:CX
 	ANDQ SI,CX
 	SHLQ $13,R10:R9
diff --git a/test/fixedbugs/issue18725.go b/test/fixedbugs/issue18725.go
new file mode 100644
index 0000000..c632dba
--- /dev/null
+++ b/test/fixedbugs/issue18725.go
@@ -0,0 +1,24 @@
+// run
+
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "os"
+
+func panicWhenNot(cond bool) {
+	if cond {
+		os.Exit(0)
+	} else {
+		panic("nilcheck elim failed")
+	}
+}
+
+func main() {
+	e := (*string)(nil)
+	panicWhenNot(e == e)
+	// Should never reach this line.
+	panicWhenNot(*e == *e)
+}
diff --git a/test/fixedbugs/issue18808.go b/test/fixedbugs/issue18808.go
new file mode 100644
index 0000000..c98386e
--- /dev/null
+++ b/test/fixedbugs/issue18808.go
@@ -0,0 +1,63 @@
+// run
+
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+const lim = 0x80000000
+
+//go:noinline
+func eq(x uint32) {
+	if x == lim {
+		return
+	}
+	panic("x == lim returned false")
+}
+
+//go:noinline
+func neq(x uint32) {
+	if x != lim {
+		panic("x != lim returned true")
+	}
+}
+
+//go:noinline
+func gt(x uint32) {
+	if x > lim {
+		return
+	}
+	panic("x > lim returned false")
+}
+
+//go:noinline
+func gte(x uint32) {
+	if x >= lim {
+		return
+	}
+	panic("x >= lim returned false")
+}
+
+//go:noinline
+func lt(x uint32) {
+	if x < lim {
+		panic("x < lim returned true")
+	}
+}
+
+//go:noinline
+func lte(x uint32) {
+	if x <= lim {
+		panic("x <= lim returned true")
+	}
+}
+
+func main() {
+	eq(lim)
+	neq(lim)
+	gt(lim+1)
+	gte(lim+1)
+	lt(lim+1)
+	lte(lim+1)
+}
diff --git a/test/fixedbugs/issue18906.go b/test/fixedbugs/issue18906.go
new file mode 100644
index 0000000..544400b
--- /dev/null
+++ b/test/fixedbugs/issue18906.go
@@ -0,0 +1,36 @@
+// run
+
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+//go:noinline
+func f(x int) {
+}
+
+//go:noinline
+func val() int8 {
+	return -1
+}
+
+var (
+	array = [257]int{}
+	slice = array[1:]
+)
+
+func init() {
+	for i := range array {
+		array[i] = i - 1
+	}
+}
+
+func main() {
+	x := val()
+	y := int(uint8(x))
+	f(y) // try and force y to be calculated and spilled
+	if slice[y] != 255 {
+		panic("incorrect value")
+	}
+}
diff --git a/test/fixedbugs/issue18915.go b/test/fixedbugs/issue18915.go
new file mode 100644
index 0000000..a432bbc
--- /dev/null
+++ b/test/fixedbugs/issue18915.go
@@ -0,0 +1,21 @@
+// errorcheck
+
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Make sure error message for invalid conditions
+// or tags are consistent with earlier Go versions.
+
+package p
+
+func _() {
+	if a := 10 { // ERROR "a := 10 used as value"
+	}
+
+	for b := 10 { // ERROR "b := 10 used as value"
+	}
+
+	switch c := 10 { // ERROR "c := 10 used as value"
+	}
+}
diff --git a/test/fixedbugs/issue6036.go b/test/fixedbugs/issue6036.go
index 795b223..8ebef5a 100644
--- a/test/fixedbugs/issue6036.go
+++ b/test/fixedbugs/issue6036.go
@@ -1,4 +1,4 @@
-// +build amd64
+// +build !386,!arm,!mips,!mipsle,!amd64p32
 // compile
 
 // Copyright 2013 The Go Authors. All rights reserved.
diff --git a/test/nilptr3.go b/test/nilptr3.go
index 8fdae8c..c681cba 100644
--- a/test/nilptr3.go
+++ b/test/nilptr3.go
@@ -40,23 +40,23 @@
 )
 
 func f1() {
-	_ = *intp // ERROR "generated nil check"
+	_ = *intp // ERROR "removed nil check"
 
 	// This one should be removed but the block copy needs
 	// to be turned into its own pseudo-op in order to see
 	// the indirect.
-	_ = *arrayp // ERROR "generated nil check"
+	_ = *arrayp // ERROR "removed nil check"
 
 	// 0-byte indirect doesn't suffice.
 	// we don't registerize globals, so there are no removed.* nil checks.
-	_ = *array0p // ERROR "generated nil check"
 	_ = *array0p // ERROR "removed nil check"
+	_ = *array0p // ERROR "generated nil check"
 
-	_ = *intp    // ERROR "removed nil check"
+	_ = *intp    // ERROR "generated nil check"
 	_ = *arrayp  // ERROR "removed nil check"
 	_ = *structp // ERROR "generated nil check"
 	_ = *emptyp  // ERROR "generated nil check"
-	_ = *arrayp  // ERROR "removed nil check"
+	_ = *arrayp  // ERROR "generated nil check"
 }
 
 func f2() {
@@ -71,15 +71,15 @@
 		empty1p    *Empty1
 	)
 
-	_ = *intp       // ERROR "generated nil check"
-	_ = *arrayp     // ERROR "generated nil check"
-	_ = *array0p    // ERROR "generated nil check"
-	_ = *array0p    // ERROR "removed.* nil check"
 	_ = *intp       // ERROR "removed.* nil check"
 	_ = *arrayp     // ERROR "removed.* nil check"
+	_ = *array0p    // ERROR "removed.* nil check"
+	_ = *array0p    // ERROR "generated nil check"
+	_ = *intp       // ERROR "generated nil check"
+	_ = *arrayp     // ERROR "removed.* nil check"
 	_ = *structp    // ERROR "generated nil check"
 	_ = *emptyp     // ERROR "generated nil check"
-	_ = *arrayp     // ERROR "removed.* nil check"
+	_ = *arrayp     // ERROR "generated nil check"
 	_ = *bigarrayp  // ERROR "generated nil check" ARM removed nil check before indirect!!
 	_ = *bigstructp // ERROR "generated nil check"
 	_ = *empty1p    // ERROR "generated nil check"
@@ -122,16 +122,16 @@
 	// x wasn't going to change across the function call.
 	// But it's a little complex to do and in practice doesn't
 	// matter enough.
-	_ = x[9999] // ERROR "removed nil check"
+	_ = x[9999] // ERROR "generated nil check" // TODO: fix
 }
 
 func f3a() {
 	x := fx10k()
 	y := fx10k()
 	z := fx10k()
-	_ = &x[9] // ERROR "generated nil check"
-	y = z
 	_ = &x[9] // ERROR "removed.* nil check"
+	y = z
+	_ = &x[9] // ERROR "generated nil check"
 	x = y
 	_ = &x[9] // ERROR "generated nil check"
 }
@@ -139,11 +139,11 @@
 func f3b() {
 	x := fx10k()
 	y := fx10k()
-	_ = &x[9] // ERROR "generated nil check"
+	_ = &x[9] // ERROR "removed.* nil check"
 	y = x
 	_ = &x[9] // ERROR "removed.* nil check"
 	x = y
-	_ = &x[9] // ERROR "removed.* nil check"
+	_ = &x[9] // ERROR "generated nil check"
 }
 
 func fx10() *[10]int
@@ -179,15 +179,15 @@
 	_ = x[9] // ERROR "generated nil check"  // bug would like to remove before indirect
 
 	fx10()
-	_ = x[9] // ERROR "removed nil check"
+	_ = x[9] // ERROR "generated nil check"  // TODO: fix
 
 	x = fx10()
 	y := fx10()
-	_ = &x[9] // ERROR "generated nil check"
+	_ = &x[9] // ERROR "removed[a-z ]* nil check"
 	y = x
 	_ = &x[9] // ERROR "removed[a-z ]* nil check"
 	x = y
-	_ = &x[9] // ERROR "removed[a-z ]* nil check"
+	_ = &x[9] // ERROR "generated nil check"
 }
 
 func f5(p *float32, q *float64, r *float32, s *float64) float64 {
diff --git a/test/writebarrier.go b/test/writebarrier.go
index 6460a6f..13f7b54 100644
--- a/test/writebarrier.go
+++ b/test/writebarrier.go
@@ -220,3 +220,19 @@
 	*p = x // no barrier
 	return
 }
+
+type T23 struct {
+	p *int
+	a int
+}
+
+var t23 T23
+var i23 int
+
+func f23() {
+	// zeroing global needs write barrier for the hybrid barrier.
+	t23 = T23{} // ERROR "write barrier"
+	// also test partial assignments
+	t23 = T23{a: 1}    // ERROR "write barrier"
+	t23 = T23{p: &i23} // ERROR "write barrier"
+}