[WebAssembly] Bulk memory intrinsics and builtins

Summary:
implements llvm intrinsics and clang intrinsics for
memory.init and data.drop.

Reviewers: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D57736

llvm-svn: 353983
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index b8f1a0b..0241436 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -25,6 +25,10 @@
 BUILTIN(__builtin_wasm_memory_size, "zIi", "n")
 BUILTIN(__builtin_wasm_memory_grow, "zIiz", "n")
 
+// Bulk memory builtins
+TARGET_BUILTIN(__builtin_wasm_memory_init, "vIUiIUiv*UiUi", "", "bulk-memory")
+TARGET_BUILTIN(__builtin_wasm_data_drop, "vIUi", "", "bulk-memory")
+
 // Floating point min/max
 BUILTIN(__builtin_wasm_min_f32, "fff", "nc")
 BUILTIN(__builtin_wasm_max_f32, "fff", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a02fd48..38de4e0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13566,6 +13566,30 @@
     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
     return Builder.CreateCall(Callee, Args);
   }
+  case WebAssembly::BI__builtin_wasm_memory_init: {
+    llvm::APSInt SegConst;
+    if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
+      llvm_unreachable("Constant arg isn't actually constant?");
+    llvm::APSInt MemConst;
+    if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext()))
+      llvm_unreachable("Constant arg isn't actually constant?");
+    if (!MemConst.isNullValue())
+      ErrorUnsupported(E, "non-zero memory index");
+    Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst),
+                     llvm::ConstantInt::get(getLLVMContext(), MemConst),
+                     EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
+                     EmitScalarExpr(E->getArg(4))};
+    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init);
+    return Builder.CreateCall(Callee, Args);
+  }
+  case WebAssembly::BI__builtin_wasm_data_drop: {
+    llvm::APSInt SegConst;
+    if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
+      llvm_unreachable("Constant arg isn't actually constant?");
+    Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst);
+    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop);
+    return Builder.CreateCall(Callee, {Arg});
+  }
   case WebAssembly::BI__builtin_wasm_throw: {
     Value *Tag = EmitScalarExpr(E->getArg(0));
     Value *Obj = EmitScalarExpr(E->getArg(1));
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 4f14e90..bba615d 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
-// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
-// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -fno-lax-vector-conversions -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +unimplemented-simd128 -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -fno-lax-vector-conversions -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
+// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -fno-lax-vector-conversions -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
 
 // SIMD convenience types
 typedef char i8x16 __attribute((vector_size(16)));
@@ -26,6 +26,18 @@
   // WEBASSEMBLY64: call i64 @llvm.wasm.memory.grow.i64(i32 0, i64 %{{.*}})
 }
 
+void memory_init(void *dest, int offset, int size) {
+  __builtin_wasm_memory_init(3, 0, dest, offset, size);
+  // WEBASSEMBLY32: call void @llvm.wasm.memory.init(i32 3, i32 0, i8* %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  // WEBASSEMBLY64: call void @llvm.wasm.memory.init(i32 3, i32 0, i8* %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+}
+
+void data_drop() {
+  __builtin_wasm_data_drop(3);
+  // WEBASSEMBLY32: call void @llvm.wasm.data.drop(i32 3)
+  // WEBASSEMBLY64: call void @llvm.wasm.data.drop(i32 3)
+}
+
 void throw(unsigned int tag, void *obj) {
   return __builtin_wasm_throw(tag, obj);
   // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 %{{.*}}, i8* %{{.*}})
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 874465e..bfeb706 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -110,4 +110,18 @@
             [llvm_anyvector_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
+//===----------------------------------------------------------------------===//
+// Bulk memory intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_memory_init :
+  Intrinsic<[],
+            [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrWriteMem, IntrInaccessibleMemOrArgMemOnly, WriteOnly<2>,
+             IntrHasSideEffects]>;
+def int_wasm_data_drop :
+  Intrinsic<[],
+            [llvm_i32_ty],
+            [IntrNoDuplicate, IntrHasSideEffects]>;
+
 } // TargetPrefix = "wasm"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
index 4642236..646b16c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
@@ -21,19 +21,48 @@
 }
 
 // Bespoke types and nodes for bulk memory ops
-def wasm_memcpy_t : SDTypeProfile<0, 3,
-  [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisInt<2>]
+def wasm_memcpy_t : SDTypeProfile<0, 5,
+  [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisPtrTy<3>, SDTCisInt<4>]
 >;
 def wasm_memcpy : SDNode<"WebAssemblyISD::MEMORY_COPY", wasm_memcpy_t,
                          [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
 
 //===----------------------------------------------------------------------===//
+// memory.init
+//===----------------------------------------------------------------------===//
+
+let mayStore = 1 in
+defm MEMORY_INIT :
+  BULK_I<(outs),
+         (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
+              I32:$offset, I32:$size),
+         (outs), (ins i32imm_op:$seg, i32imm_op:$idx),
+         [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
+            I32:$offset, I32:$size
+          )],
+         "memory.init\t$seg, $idx, $dest, $offset, $size",
+         "memory.init\t$seg, $idx", 0x08>;
+
+//===----------------------------------------------------------------------===//
+// data.drop
+//===----------------------------------------------------------------------===//
+
+defm DATA_DROP :
+  BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
+         [(int_wasm_data_drop (i32 imm:$seg))],
+         "data.drop\t$seg", "data.drop\t$seg", 0x09>;
+
+//===----------------------------------------------------------------------===//
 // memory.copy
 //===----------------------------------------------------------------------===//
 
 let mayLoad = 1, mayStore = 1 in
-defm MEMORY_COPY : BULK_I<(outs), (ins I32:$dst, I32:$src, I32:$len),
-                          (outs), (ins),
-                          [(wasm_memcpy I32:$dst, I32:$src, I32:$len)],
-                          "memory.copy\t$dst, $src, $len",
-                          "memory.copy", 0x0a>;
+defm MEMORY_COPY :
+  BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx,
+                      I32:$dst, I32:$src, I32:$len),
+         (outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx),
+         [(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx),
+           I32:$dst, I32:$src, I32:$len
+         )],
+         "memory.copy\t$src_idx, $dst_idx, $dst, $src, $len",
+         "memory.copy\t$src_idx, $dst_idx", 0x0a>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index 04be3d7..34d93ec 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -19,16 +19,18 @@
 WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor
 
 SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy(
-    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2,
-    SDValue Op3, unsigned Align, bool IsVolatile, bool AlwaysInline,
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
   if (!DAG.getMachineFunction()
            .getSubtarget<WebAssemblySubtarget>()
            .hasBulkMemory())
     return SDValue();
 
-  return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, Chain, Op1,
-                     Op2, Op3);
+  SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+  return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other,
+                     {Chain, MemIdx, MemIdx, Dst, Src,
+                      DAG.getZExtOrTrunc(Size, DL, MVT::i32)});
 }
 
 SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll
new file mode 100644
index 0000000..dfb74b7
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s
+
+; Test that bulk memory intrinsics lower correctly
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: memory_init:
+; CHECK-NEXT: .functype  memory_init (i32, i32, i32) -> ()
+; CHECK-NEXT: memory.init 3, 0, $0, $1, $2
+; CHECK-NEXT: return
+declare void @llvm.wasm.memory.init(i32, i32, i8*, i32, i32)
+define void @memory_init(i8* %dest, i32 %offset, i32 %size) {
+  call void @llvm.wasm.memory.init(
+    i32 3, i32 0, i8* %dest, i32 %offset, i32 %size
+  )
+  ret void
+}
+
+; CHECK-LABEL: data_drop:
+; CHECK-NEXT: .functype data_drop () -> ()
+; CHECK-NEXT: data.drop 3
+; CHECK-NEXT: return
+declare void @llvm.wasm.data.drop(i32)
+define void @data_drop() {
+  call void @llvm.wasm.data.drop(i32 3)
+  ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
index acece86..68ce3e4 100644
--- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
+++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll
@@ -6,40 +6,39 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1)
+
+declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1)
+
 ; CHECK-LABEL: memcpy_i8:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> ()
-; BULK-MEM-NEXT: memory.copy $0, $1, $2
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
 ; BULK-MEM-NEXT: return
-declare void @llvm.memcpy.p0i8.p0i8.i32(
-  i8* %dest, i8* %src, i32 %len, i1 %volatile
-)
-define void @memcpy_i8(i8* %dest, i8* %src, i32 %len) {
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 0)
+define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) {
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
   ret void
 }
 
 ; CHECK-LABEL: memmove_i8:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> ()
-; BULK-MEM-NEXT: memory.copy $0, $1, $2
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
 ; BULK-MEM-NEXT: return
-declare void @llvm.memmove.p0i8.p0i8.i32(
-  i8* %dest, i8* %src, i32 %len, i1 %volatile
-)
-define void @memmove_i8(i8* %dest, i8* %src, i32 %len) {
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 0)
+define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) {
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0)
   ret void
 }
 
 ; CHECK-LABEL: memcpy_i32:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> ()
-; BULK-MEM-NEXT: memory.copy $0, $1, $2
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
 ; BULK-MEM-NEXT: return
-declare void @llvm.memcpy.p0i32.p0i32.i32(
-  i32* %dest, i32* %src, i32 %len, i1 %volatile
-)
 define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) {
   call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
   ret void
@@ -48,11 +47,8 @@
 ; CHECK-LABEL: memmove_i32:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> ()
-; BULK-MEM-NEXT: memory.copy $0, $1, $2
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2
 ; BULK-MEM-NEXT: return
-declare void @llvm.memmove.p0i32.p0i32.i32(
-  i32* %dest, i32* %src, i32 %len, i1 %volatile
-)
 define void @memmove_i32(i32* %dest, i32* %src, i32 %len) {
   call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
   ret void
@@ -82,7 +78,7 @@
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.copy $0, $1, $pop[[L0]]
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
 ; BULK-MEM-NEXT: return
 define void @memcpy_1024(i8* %dest, i8* %src) {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
@@ -93,7 +89,7 @@
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
 ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
-; BULK-MEM-NEXT: memory.copy $0, $1, $pop[[L0]]
+; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]]
 ; BULK-MEM-NEXT: return
 define void @memmove_1024(i8* %dest, i8* %src) {
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)