diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2283,6 +2283,27 @@ :ref:`stackmap entry `. See the intrinsic description for further details. +.. _ob_wrapping + +Integer Wrapping Operand Bundles +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There are two operand bundles to specify integer wrapping behavior at a call +site: ``"nuw"`` for "No Unsigned Wrap" and ``"nsw"`` for "No Signed Wrap". If +the ``nuw`` and/or ``nsw`` bundles are present, the computation of the result +value of the called function is a :ref:`poison value ` if +unsigned and/or signed overflow, respectively, occurs. The ``nuw`` and ``nsw`` +bundles are only supported for calls of the following functions: +``@llvm.matrix.multiply.*`` with integer matrix operands. + +The integer wrapping operand bundles take exactly one argument of type ``bool``, +which has to be either ``true`` or ``false``. At most one of each ``"nuw"`` and +``"nsw"`` bundle can be attached to a call. See the example below. + +.. code-block:: llvm + %r = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 true), "nsw"(i1 true) ] + + .. _moduleasm: Module-Level Inline Assembly @@ -15656,6 +15677,9 @@ Vectors ``%A``, ``%B``, and the returned vector all have the same float or integer element type. +:ref:`"Integer wrapping operand bundles" ` can be used to indicate whether ``nuw`` +and ``nsw`` should be used when '``llvm.matrix.multiply.*``' calls with integer +matrix arguments. '``llvm.matrix.column.major.load.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -93,6 +93,8 @@ OB_cfguardtarget = 3, // "cfguardtarget" OB_preallocated = 4, // "preallocated" OB_gc_live = 5, // "gc-live" + OB_nuw = 6, // "nuw" + OB_nsw = 7, // "nsw" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -78,6 +78,16 @@ "gc-transition operand bundle id drifted!"); (void)GCLiveEntry; + auto *NUWEntry = pImpl->getOrInsertBundleTag("nuw"); + assert(NUWEntry->second == LLVMContext::OB_nuw && + "nuw operand bundle id drifted!"); + (void)NUWEntry; + + auto *NSWEntry = pImpl->getOrInsertBundleTag("nsw"); + assert(NSWEntry->second == LLVMContext::OB_nsw && + "nsw operand bundle id drifted!"); + (void)NSWEntry; + SyncScope::ID SingleThreadSSID = pImpl->getOrInsertSyncScopeID("singlethread"); assert(SingleThreadSSID == SyncScope::SingleThread && diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3146,7 +3146,8 @@ // and at most one "preallocated" operand bundle. bool FoundDeoptBundle = false, FoundFuncletBundle = false, FoundGCTransitionBundle = false, FoundCFGuardTargetBundle = false, - FoundPreallocatedBundle = false, FoundGCLiveBundle = false;; + FoundPreallocatedBundle = false, FoundGCLiveBundle = false, + FoundNUWBundle = false, FoundNSWBundle = false; for (unsigned i = 0, e = Call.getNumOperandBundles(); i < e; ++i) { OperandBundleUse BU = Call.getOperandBundleAt(i); uint32_t Tag = BU.getTagID(); @@ -3187,9 +3188,41 @@ Assert(!FoundGCLiveBundle, "Multiple gc-live operand bundles", Call); FoundGCLiveBundle = true; + } else if (Tag == LLVMContext::OB_nuw) { + Assert(!FoundNUWBundle, "Multiple nuw operand bundles", Call); + FoundNUWBundle = true; + } else if (Tag == LLVMContext::OB_nsw) { + Assert(!FoundNSWBundle, "Multiple nsw operand bundles", Call); + FoundNSWBundle = true; } } + // Nuw/nsw bundles are only supported for certain calls. + auto IsValidFunctionForWrapBundle = [&Call]() { + if (auto *II = dyn_cast(&Call)) + return II->getIntrinsicID() == Intrinsic::matrix_multiply && + II->getType()->getScalarType()->isIntegerTy(); + return false; + }; + Assert((!FoundNUWBundle && !FoundNSWBundle) || IsValidFunctionForWrapBundle(), + "nuw/nsw bundles not supported on the called function", Call); + + // Check the arguments of nuw & nsw bundles. + auto VerifyWrapBundle = [this, &Call](unsigned BundleId, StringRef Name) { + auto Bundle = Call.getOperandBundle(BundleId); + if (Bundle) { + Assert(Bundle->Inputs.size() == 1, + Name + " bundle must have a single argument", Call); + auto OpTy = Bundle->Inputs[0]->getType(); + Assert(OpTy->isIntegerTy() && OpTy->getIntegerBitWidth() == 1, + Name + " bundle operand must be an integer with bitwidth 1", Call); + Assert(isa(Bundle->Inputs[0]), + Name + " bundle operand must be a constant integer", Call); + } + }; + VerifyWrapBundle(LLVMContext::OB_nuw, "nuw"); + VerifyWrapBundle(LLVMContext::OB_nsw, "nsw"); + // Verify that each inlinable callsite of a debug-info-bearing function in a // debug-info-bearing function has a debug location attached to it. Failure to // do so causes assertion failures when the inliner sets up inline scope info. diff --git a/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll b/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll --- a/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll +++ b/llvm/test/Bitcode/operand-bundles-bc-analyzer.ll @@ -9,6 +9,8 @@ ; CHECK-NEXT: @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) +declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32) + +define void @f_nuw(<4 x i32> %a, i1 %c) { +entry: +; CHECK: Multiple nuw operand bundles +; CHECK-NEXT: %v.1 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 true), "nuw"(i1 true) ] + %v.1 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 true), "nuw"(i1 true) ] + +; CHECK-NOT: %v.2 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 true) ] + %v.2 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 true) ] + +; CHECK: nuw bundle operand must be an integer with bitwidth 1 +; CHECK-NEXT: %v.3 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i32 10) ] + %v.3 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i32 10) ] + +; CHECK: nuw bundle must have a single argument +; CHECK-NEXT: %v.4 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i32 10, i1 true) ] + %v.4 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i32 10, i1 true) ] + +; CHECK: nuw bundle operand must be a constant integer +; CHECK-NEXT: %v.5 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 %c) ] + %v.5 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nuw"(i1 %c) ] + +; CHECK: nuw/nsw bundles not supported on the called function +; CHECK-NEXT: call void @g() [ "nuw"(i1 true) ] + call void @g( )[ "nuw"(i1 true) ] + +; CHECK: nuw/nsw bundles not supported on the called function +; CHECK-NEXT: %v.6 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 2, i32 2, i32 2) [ "nuw"(i1 %c) ] + %v.6 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 2, i32 2, i32 2) [ "nuw"(i1 %c) ] + + ret void +} + +define void @f_nsw(<4 x i32> %a, i1 %c) { +entry: +; CHECK: Multiple nsw operand bundles +; CHECK-NEXT: %v.1 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i1 true), "nsw"(i1 true) ] + %v.1 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i1 true), "nsw"(i1 true) ] + +; CHECK-NOT: %v.2 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i1 true) ] + %v.2 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i1 true) ] + +; CHECK: nsw bundle operand must be an integer with bitwidth 1 +; CHECK-NEXT: %v.3 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i32 10) ] + %v.3 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i32 10) ] + +; CHECK: nsw bundle must have a single argument +; CHECK-NEXT: %v.4 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i32 10, i1 true) ] + %v.4 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i32 10, i1 true) ] + +; CHECK: nsw bundle operand must be a constant integer +; CHECK-NEXT: %v.5 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i1 %c) ] + %v.5 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4i32.v4i32(<4 x i32> %a, <4 x i32> %a, i32 2, i32 2, i32 2) [ "nsw"(i1 %c) ] + +; CHECK: nuw/nsw bundles not supported on the called function +; CHECK-NEXT: call void @g() [ "nsw"(i1 true) ] + call void @g( )[ "nsw"(i1 true) ] + +; CHECK: nuw/nsw bundles not supported on the called function +; CHECK-NEXT: %v.6 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 2, i32 2, i32 2) [ "nsw"(i1 %c) ] + %v.6 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i32 2, i32 2, i32 2) [ "nsw"(i1 %c) ] + + ret void +}