Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1177,6 +1177,13 @@ does not alias any other memory visible within a function and that a ``swifterror`` alloca passed as an argument does not escape. +``immarg`` + This indicates the parameter is required to be an immediate + value. This must be a trivial immediate integer or floating-point + constant. Undef or constant expressions are not valid. This is + only valid on intrinsic declarations and cannot be applied to a + call site or arbitrary function. + .. _gc: Garbage Collector Strategy Names Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -603,6 +603,7 @@ ATTR_KIND_OPT_FOR_FUZZING = 57, ATTR_KIND_SHADOWCALLSTACK = 58, ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59, + ATTR_KIND_IMMARG = 60 }; enum ComdatSelectionKindCodes { Index: include/llvm/IR/Attributes.td =================================================================== --- include/llvm/IR/Attributes.td +++ include/llvm/IR/Attributes.td @@ -130,6 +130,9 @@ /// Return value is always equal to this argument. def Returned : EnumAttr<"returned">; +/// Parameter is required to be a trivial constant. +def ImmArg : EnumAttr<"immarg">; + /// Function can return twice. def ReturnsTwice : EnumAttr<"returns_twice">; Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -69,6 +69,11 @@ int ArgNo = argNo; } +// ImmArg - The specified argument must be an immediate. +class ImmArg : IntrinsicProperty { + int ArgNo = argNo; +} + // ReadOnly - The specified argument pointer is not written to through the // pointer by the intrinsic. class ReadOnly : IntrinsicProperty { @@ -397,9 +402,9 @@ //===--------------------- Code Generator Intrinsics ----------------------===// // -def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; -def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; @@ -484,16 +489,17 @@ [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, - WriteOnly<0>, ReadOnly<1>]>; + WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, - ReadOnly<1>]>; + ReadOnly<1>, ImmArg<3>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, + ImmArg<3>]>; // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. @@ -560,7 +566,7 @@ def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable]>, + [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Constrained Floating Point Intrinsics ----------------===// @@ -697,8 +703,6 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; - def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_fshl : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; @@ -706,6 +710,11 @@ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; } +let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in { + def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; +} + //===------------------------ Debugger Intrinsics -------------------------===// // @@ -848,11 +857,11 @@ // def int_smul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; def int_umul_fix : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable, Commutative]>; + [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>; //===------------------------- Memory Use Markers -------------------------===// // @@ -1018,23 +1027,24 @@ LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly]>; + [IntrArgMemOnly, ImmArg<2>]>; def int_masked_load : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly]>; + [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; def int_masked_gather: Intrinsic<[llvm_anyvector_ty], [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem]>; + [IntrReadMem, ImmArg<1>]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>]>; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [ImmArg<2>]>; def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], [LLVMPointerToElt<0>, @@ -1090,7 +1100,7 @@ ], [ IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, - ReadOnly<1> + ReadOnly<1>, ImmArg<3> ]>; // @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize) @@ -1101,13 +1111,13 @@ ], [ IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, - ReadOnly<1> + ReadOnly<1>, ImmArg<3> ]>; // @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) def int_memset_element_unordered_atomic : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], - [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>; + [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>; //===------------------------ Reduction Intrinsics ------------------------===// // Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -177,7 +177,7 @@ // This is always moved to the beginning of the basic block. def int_amdgcn_init_exec : Intrinsic<[], [llvm_i64_ty], // 64-bit literal constant - [IntrConvergent]>; + [IntrConvergent, ImmArg<0>]>; // Set EXEC according to a thread count packed in an SGPR input: // thread_count = (input >> bitoffset) & 0x7f; @@ -195,9 +195,9 @@ // The first parameter is s_sendmsg immediate (i16), // the second one is copied to m0 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, - Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; @@ -206,7 +206,7 @@ Intrinsic<[], [], [IntrConvergent]>; def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">, - Intrinsic<[], [llvm_i32_ty], []>; + Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>; def int_amdgcn_div_scale : Intrinsic< // 1st parameter: Numerator @@ -215,7 +215,7 @@ // second. (0 = first, 1 = second). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem, IntrSpeculatable] + [IntrNoMem, IntrSpeculatable, ImmArg<2>] >; def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], @@ -373,7 +373,7 @@ llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>], "", + [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand] >; @@ -388,7 +388,7 @@ llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>] + [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>] >; class AMDGPUDSOrderedIntrinsic : Intrinsic< @@ -403,14 +403,17 @@ llvm_i32_ty, // ordered count index (OA index), also added to the address llvm_i1_ty, // wave release, usually set to 1 llvm_i1_ty], // wave done, set to 1 for the last ordered instruction - [NoCapture<0>] + [NoCapture<0>, + ImmArg<2>, ImmArg<3>, ImmArg<4>, + ImmArg<5>, ImmArg<6>, ImmArg<7> + ] >; class AMDGPUDSAppendConsumedIntrinsic : Intrinsic< [llvm_i32_ty], [llvm_anyptr_ty, // LDS or GDS ptr llvm_i1_ty], // isVolatile - [IntrConvergent, IntrArgMemOnly, NoCapture<0>] + [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>] >; def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic; @@ -649,6 +652,19 @@ let LodClampMip = "mip"; } +// Helper class for figuring out image intrinsic argument indexes. +class AMDGPUImageDimIntrinsicEval { + int NumDataArgs = !size(P_.DataArgs); + int NumDmaskArgs = !if(P_.IsAtomic, 0, 1); + int NumVAddrArgs = !size(P_.AddrArgs); + int NumRSrcArgs = 1; + int NumSampArgs = !if(P_.IsSample, 2, 0); + int DmaskArgIndex = NumDataArgs; + int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); + int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); + int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); +} + // All dimension-aware intrinsics are derived from this class. class AMDGPUImageDimIntrinsic props, @@ -663,7 +679,12 @@ llvm_i1_ty], []), // unorm(imm) [llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe) llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - props, "", sdnodeprops>, + !listconcat(props, + !if(P_.IsAtomic, [], [ImmArg.DmaskArgIndex>]), + !if(P_.IsSample, [ImmArg.UnormArgIndex>], []), + [ImmArg.TexFailCtrlArgIndex>, + ImmArg.CachePolicyArgIndex>]), + "", sdnodeprops>, AMDGPURsrcIntrinsic { AMDGPUDimProfile P = P_; @@ -825,7 +846,7 @@ llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; @@ -835,7 +856,7 @@ [llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // byte offset(SGPR/VGPR/imm) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc) - [IntrNoMem]>, + [IntrNoMem, ImmArg<2>]>, AMDGPURsrcIntrinsic<0>; class AMDGPUBufferStore : Intrinsic < @@ -846,7 +867,7 @@ llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -864,7 +885,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrReadMem], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad; def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad; @@ -876,7 +897,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrReadMem], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad; def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad; @@ -888,7 +909,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrWriteMem], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore; def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore; @@ -901,7 +922,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrWriteMem], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; @@ -913,7 +934,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [], "", [SDNPMemOperand]>, + [ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; @@ -933,7 +954,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [], "", [SDNPMemOperand]>, + [ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; class AMDGPUStructBufferAtomic : Intrinsic < @@ -944,7 +965,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [], "", [SDNPMemOperand]>, + [ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic; def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic; @@ -965,7 +986,7 @@ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty], // cachepolicy(imm; bit 1 = slc) - [], "", [SDNPMemOperand]>, + [ImmArg<6>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; // Obsolescent tbuffer intrinsics. @@ -980,7 +1001,8 @@ llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>, + ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_tbuffer_store : Intrinsic < @@ -995,7 +1017,8 @@ llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>, + ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; // New tbuffer intrinsics, with: @@ -1009,7 +1032,7 @@ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrReadMem], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_raw_tbuffer_store : Intrinsic < @@ -1020,7 +1043,7 @@ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrWriteMem], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; def int_amdgcn_struct_tbuffer_load : Intrinsic < @@ -1031,7 +1054,7 @@ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrReadMem], "", [SDNPMemOperand]>, + [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>; def int_amdgcn_struct_tbuffer_store : Intrinsic < @@ -1043,7 +1066,7 @@ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt) llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc) - [IntrWriteMem], "", [SDNPMemOperand]>, + [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1>; class AMDGPUBufferAtomic : Intrinsic < @@ -1053,7 +1076,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>, + [ImmArg<4>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<1, 0>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; @@ -1073,7 +1096,7 @@ llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - [], "", [SDNPMemOperand]>, + [ImmArg<5>], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; } // defset AMDGPUBufferIntrinsics @@ -1090,7 +1113,7 @@ llvm_i1_ty, // done llvm_i1_ty // vm ], - [] + [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>] >; // exp with compr bit set. @@ -1101,7 +1124,7 @@ LLVMMatchType<0>, // src1 llvm_i1_ty, // done llvm_i1_ty], // vm - [] + [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>] >; def int_amdgcn_buffer_wbinvl1_sc : @@ -1276,11 +1299,11 @@ def int_amdgcn_icmp : Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrConvergent]>; + [IntrNoMem, IntrConvergent, ImmArg<2>]>; def int_amdgcn_fcmp : Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], - [IntrNoMem, IntrConvergent]>; + [IntrNoMem, IntrConvergent, ImmArg<2>]>; def int_amdgcn_readfirstlane : GCCBuiltin<"__builtin_amdgcn_readfirstlane">, @@ -1370,7 +1393,8 @@ def int_amdgcn_mov_dpp : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i1_ty], [IntrNoMem, IntrConvergent]>; + llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>, + ImmArg<2>, ImmArg<3>, ImmArg<4>]>; // llvm.amdgcn.update.dpp.i32 // Should be equivalent to: @@ -1378,8 +1402,10 @@ // v_mov_b32 def int_amdgcn_update_dpp : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>; + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrConvergent, + ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_s_dcache_wb : GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">, Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -684,6 +684,7 @@ KEYWORD(uwtable); KEYWORD(writeonly); KEYWORD(zeroext); + KEYWORD(immarg); KEYWORD(type); KEYWORD(opaque); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1302,6 +1302,7 @@ case lltok::kw_sret: case lltok::kw_swifterror: case lltok::kw_swiftself: + case lltok::kw_immarg: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute on a function"); @@ -1595,6 +1596,7 @@ case lltok::kw_swiftself: B.addAttribute(Attribute::SwiftSelf); break; case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; + case lltok::kw_immarg: B.addAttribute(Attribute::ImmArg); break; case lltok::kw_alignstack: case lltok::kw_alwaysinline: @@ -1689,6 +1691,7 @@ case lltok::kw_sret: case lltok::kw_swifterror: case lltok::kw_swiftself: + case lltok::kw_immarg: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -226,6 +226,7 @@ kw_uwtable, kw_writeonly, kw_zeroext, + kw_immarg, kw_type, kw_opaque, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -1188,6 +1188,8 @@ case Attribute::ShadowCallStack: return 1ULL << 59; case Attribute::SpeculativeLoadHardening: return 1ULL << 60; + case Attribute::ImmArg: + return 1ULL << 61; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; @@ -1424,6 +1426,8 @@ return Attribute::WriteOnly; case bitc::ATTR_KIND_Z_EXT: return Attribute::ZExt; + case bitc::ATTR_KIND_IMMARG: + return Attribute::ImmArg; } } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -712,6 +712,8 @@ return bitc::ATTR_KIND_WRITEONLY; case Attribute::ZExt: return bitc::ATTR_KIND_Z_EXT; + case Attribute::ImmArg: + return bitc::ATTR_KIND_IMMARG; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -350,6 +350,8 @@ return "zeroext"; if (hasAttribute(Attribute::Cold)) return "cold"; + if (hasAttribute(Attribute::ImmArg)) + return "immarg"; // FIXME: These should be output like this: // Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -499,7 +499,7 @@ const Value *V); void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V); void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, - const Value *V); + const Value *V, bool IsIntrinsic); void verifyFunctionMetadata(ArrayRef> MDs); void visitConstantExprsRecursively(const Constant *EntryC); @@ -1561,6 +1561,11 @@ verifyAttributeTypes(Attrs, /*IsFunction=*/false, V); + if (Attrs.hasAttribute(Attribute::ImmArg)) { + Assert(Attrs.getNumAttributes() == 1, + "Attribute 'immarg' is incompatible with other attributes", V); + } + // Check for mutually incompatible attributes. Only inreg is compatible with // sret. unsigned AttrCount = 0; @@ -1648,7 +1653,7 @@ // Check parameter attributes against a function type. // The value V is printed in error messages. void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, - const Value *V) { + const Value *V, bool IsIntrinsic) { if (Attrs.isEmpty()) return; @@ -1685,6 +1690,11 @@ Type *Ty = FT->getParamType(i); AttributeSet ArgAttrs = Attrs.getParamAttributes(i); + if (!IsIntrinsic) { + Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg), + "immarg attribute only applies to intrinsics",V); + } + verifyParameterAttrs(ArgAttrs, Ty, V); if (ArgAttrs.hasAttribute(Attribute::Nest)) { @@ -2129,8 +2139,11 @@ Assert(verifyAttributeCount(Attrs, FT->getNumParams()), "Attribute after last parameter!", &F); + bool isLLVMdotName = F.getName().size() >= 5 && + F.getName().substr(0, 5) == "llvm."; + // Check function attributes. - verifyFunctionAttrs(FT, Attrs, &F); + verifyFunctionAttrs(FT, Attrs, &F, isLLVMdotName); // On function declarations/definitions, we do not support the builtin // attribute. We do not check this in VerifyFunctionAttrs since that is @@ -2169,9 +2182,6 @@ break; } - bool isLLVMdotName = F.getName().size() >= 5 && - F.getName().substr(0, 5) == "llvm."; - // Check that the argument values match the function type for this function... unsigned i = 0; for (const Argument &Arg : F.args()) { @@ -2779,17 +2789,21 @@ Assert(verifyAttributeCount(Attrs, Call.arg_size()), "Attribute after last parameter!", Call); + bool IsIntrinsic = Call.getCalledFunction() && + Call.getCalledFunction()->getName().startswith("llvm."); + + Function *Callee + = dyn_cast(Call.getCalledValue()->stripPointerCasts()); + if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) { // Don't allow speculatable on call sites, unless the underlying function // declaration is also speculatable. - Function *Callee = - dyn_cast(Call.getCalledValue()->stripPointerCasts()); Assert(Callee && Callee->isSpeculatable(), "speculatable attribute may not apply to call sites", Call); } // Verify call attributes. - verifyFunctionAttrs(FTy, Attrs, &Call); + verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic); // Conservatively check the inalloca argument. // We have a bug if we can find that there is an underlying alloca without @@ -2804,7 +2818,7 @@ // For each argument of the callsite, if it has the swifterror argument, // make sure the underlying alloca/parameter it comes from has a swifterror as // well. - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { if (Call.paramHasAttr(i, Attribute::SwiftError)) { Value *SwiftErrorArg = Call.getArgOperand(i); if (auto AI = dyn_cast(SwiftErrorArg->stripInBoundsOffsets())) { @@ -2821,6 +2835,21 @@ Call); } + if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) { + // Don't allow immarg on call sites, unless the underlying declaration + // also has the matching immarg. + Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg), + "immarg may not apply only to call sites", + Call.getArgOperand(i), Call); + } + + if (Call.paramHasAttr(i, Attribute::ImmArg)) { + Value *ArgVal = Call.getArgOperand(i); + Assert(isa(ArgVal) || isa(ArgVal), + "immarg operand has non-immediate parameter", ArgVal, Call); + } + } + if (FTy->isVarArg()) { // FIXME? is 'nest' even legal here? bool SawNest = false; @@ -2870,8 +2899,7 @@ } // Verify that there's no metadata unless it's a direct call to an intrinsic. - if (!Call.getCalledFunction() || - !Call.getCalledFunction()->getName().startswith("llvm.")) { + if (!IsIntrinsic) { for (Type *ParamTy : FTy->params()) { Assert(!ParamTy->isMetadataTy(), "Function has metadata parameter but isn't an intrinsic", Call); @@ -4222,9 +4250,7 @@ "alignment of arg 1 of memory intrinsic must be 0 or a power of 2", Call); } - Assert(isa(Call.getArgOperand(3)), - "isvolatile argument of memory intrinsics must be a constant int", - Call); + break; } case Intrinsic::memcpy_element_unordered_atomic: @@ -4233,11 +4259,7 @@ const auto *AMI = cast(&Call); ConstantInt *ElementSizeCI = - dyn_cast(AMI->getRawElementSizeInBytes()); - Assert(ElementSizeCI, - "element size of the element-wise unordered atomic memory " - "intrinsic must be a constant int", - Call); + cast(AMI->getRawElementSizeInBytes()); const APInt &ElementSizeVal = ElementSizeCI->getValue(); Assert(ElementSizeVal.isPowerOf2(), "element size of the element-wise atomic memory intrinsic " Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -920,8 +920,8 @@ Info.align = 0; Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); - if (!Vol || !Vol->isZero()) + const ConstantInt *Vol = cast(CI.getOperand(4)); + if (!Vol->isZero()) Info.flags |= MachineMemOperand::MOVolatile; return true; @@ -934,8 +934,8 @@ Info.align = 0; Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - const ConstantInt *Vol = dyn_cast(CI.getOperand(1)); - if (!Vol || !Vol->isZero()) + const ConstantInt *Vol = cast(CI.getOperand(1)); + if (!Vol->isZero()) Info.flags |= MachineMemOperand::MOVolatile; return true; @@ -3746,10 +3746,7 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - const auto *CD = dyn_cast(N->getOperand(3)); - if (!CD) - return DAG.getUNDEF(VT); - + const auto *CD = cast(N->getOperand(3)); int CondCode = CD->getSExtValue(); if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE || CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE) @@ -3780,9 +3777,7 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - const auto *CD = dyn_cast(N->getOperand(3)); - if (!CD) - return DAG.getUNDEF(VT); + const auto *CD = cast(N->getOperand(3)); int CondCode = CD->getSExtValue(); if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE || @@ -4649,9 +4644,7 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG, SDValue *GLC, SDValue *SLC) { - auto CachePolicyConst = dyn_cast(CachePolicy.getNode()); - if (!CachePolicyConst) - return false; + auto CachePolicyConst = cast(CachePolicy.getNode()); uint64_t Value = CachePolicyConst->getZExtValue(); SDLoc DL(CachePolicy); @@ -4752,9 +4745,7 @@ static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE, SDValue *LWE, bool &IsTexFail) { - auto TexFailCtrlConst = dyn_cast(TexFailCtrl.getNode()); - if (!TexFailCtrlConst) - return false; + auto TexFailCtrlConst = cast(TexFailCtrl.getNode()); uint64_t Value = TexFailCtrlConst->getZExtValue(); if (Value) { @@ -4817,9 +4808,7 @@ } } else { unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa(Op) ? 2 : 1; - auto DMaskConst = dyn_cast(Op.getOperand(DMaskIdx)); - if (!DMaskConst) - return Op; + auto DMaskConst = cast(Op.getOperand(DMaskIdx)); DMask = DMaskConst->getZExtValue(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); @@ -4933,9 +4922,7 @@ CtrlIdx = AddrIdx + NumVAddrs + 1; } else { auto UnormConst = - dyn_cast(Op.getOperand(AddrIdx + NumVAddrs + 2)); - if (!UnormConst) - return Op; + cast(Op.getOperand(AddrIdx + NumVAddrs + 2)); Unorm = UnormConst->getZExtValue() ? True : False; CtrlIdx = AddrIdx + NumVAddrs + 3; @@ -5388,10 +5375,7 @@ return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::amdgcn_div_scale: { - // 3rd parameter required to be a constant. - const ConstantSDNode *Param = dyn_cast(Op.getOperand(3)); - if (!Param) - return DAG.getMergeValues({ DAG.getUNDEF(VT), DAG.getUNDEF(MVT::i1) }, DL); + const ConstantSDNode *Param = cast(Op.getOperand(3)); // Translate to the operands expected by the machine instruction. The // first parameter must be the same as the first instruction. Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3546,10 +3546,7 @@ } case Intrinsic::amdgcn_exp: case Intrinsic::amdgcn_exp_compr: { - ConstantInt *En = dyn_cast(II->getArgOperand(1)); - if (!En) // Illegal. - break; - + ConstantInt *En = cast(II->getArgOperand(1)); unsigned EnBits = En->getZExtValue(); if (EnBits == 0xf) break; // All inputs enabled. @@ -3639,10 +3636,7 @@ } case Intrinsic::amdgcn_icmp: case Intrinsic::amdgcn_fcmp: { - const ConstantInt *CC = dyn_cast(II->getArgOperand(2)); - if (!CC) - break; - + const ConstantInt *CC = cast(II->getArgOperand(2)); // Guard against invalid arguments. int64_t CCVal = CC->getZExtValue(); bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp; @@ -3792,11 +3786,10 @@ case Intrinsic::amdgcn_update_dpp: { Value *Old = II->getArgOperand(0); - auto BC = dyn_cast(II->getArgOperand(5)); - auto RM = dyn_cast(II->getArgOperand(3)); - auto BM = dyn_cast(II->getArgOperand(4)); - if (!BC || !RM || !BM || - BC->isZeroValue() || + auto BC = cast(II->getArgOperand(5)); + auto RM = cast(II->getArgOperand(3)); + auto BM = cast(II->getArgOperand(4)); + if (BC->isZeroValue() || RM->getZExtValue() != 0xF || BM->getZExtValue() != 0xF || isa(Old)) Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -983,10 +983,7 @@ // below. DemandedElts = (1 << DemandedElts.getActiveBits()) - 1; } else { - ConstantInt *DMask = dyn_cast(II->getArgOperand(DMaskIdx)); - if (!DMask) - return nullptr; // non-constant dmask is not supported by codegen - + ConstantInt *DMask = cast(II->getArgOperand(DMaskIdx)); unsigned DMaskVal = DMask->getZExtValue() & 0xf; // Mask off values that are undefined because the dmask doesn't cover them @@ -1640,12 +1637,9 @@ return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts); default: { if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID())) { - LLVM_DEBUG( - Value *TFC = II->getArgOperand(II->getNumOperands() - 2); - assert(!isa(TFC) || - dyn_cast(TFC)->getZExtValue() == 0); - ); - + assert(cast( + II->getArgOperand( + II->getNumOperands() - 2))->getZExtValue() == 0); return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0); } Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -793,6 +793,7 @@ case Attribute::SwiftSelf: case Attribute::WriteOnly: case Attribute::ZExt: + case Attribute::ImmArg: case Attribute::EndAttrKinds: continue; // Those attributes should be safe to propagate to the extracted function. Index: test/Assembler/autoupgrade-invalid-mem-intrinsics.ll =================================================================== --- /dev/null +++ test/Assembler/autoupgrade-invalid-mem-intrinsics.ll @@ -0,0 +1,15 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; Check that remangling code doesn't fail on an intrinsic with wrong signature + +; CHECK: Attribute after last parameter! +; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memset.i64 +declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind + +; CHECK: Attribute after last parameter! +; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memcpy.i64 +declare void @llvm.memcpy.i64(i8* nocapture, i8, i64) nounwind + +; CHECK: Attribute after last parameter! +; CHECK-NEXT: void (i8*, i8, i64)* @llvm.memmove.i64 +declare void @llvm.memmove.i64(i8* nocapture, i8, i64) nounwind Index: test/Assembler/immarg-param-attribute.ll =================================================================== --- /dev/null +++ test/Assembler/immarg-param-attribute.ll @@ -0,0 +1,39 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; CHECK: declare void @llvm.test.immarg.intrinsic.i32(i32 immarg) +declare void @llvm.test.immarg.intrinsic.i32(i32 immarg) + +; CHECK: declare void @llvm.test.immarg.intrinsic.f32(float immarg) +declare void @llvm.test.immarg.intrinsic.f32(float immarg) + +; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.i32( +define void @call_llvm.test.immarg.intrinsic.i32() { + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0) + call void @llvm.test.immarg.intrinsic.i32(i32 0) + + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0) + call void @llvm.test.immarg.intrinsic.i32(i32 0) + + ; CHECK call void @llvm.test.immarg.intrinsic.i32(i32 1) + call void @llvm.test.immarg.intrinsic.i32(i32 1) + + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 5) + call void @llvm.test.immarg.intrinsic.i32(i32 add (i32 2, i32 3)) + + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 0) + call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* null to i32)) + ret void +} + +; CHECK-LABEL: @call_llvm.test.immarg.intrinsic.f32( +define void @call_llvm.test.immarg.intrinsic.f32() { + ; CHECK: call void @llvm.test.immarg.intrinsic.f32(float 1.000000e+00) + call void @llvm.test.immarg.intrinsic.f32(float 1.0) + ret void +} + +define void @on_callsite_and_declaration() { + ; CHECK: call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0) + call void @llvm.test.immarg.intrinsic.i32(i32 immarg 0) + ret void +} Index: test/Assembler/invalid-immarg.ll =================================================================== --- /dev/null +++ test/Assembler/invalid-immarg.ll @@ -0,0 +1,34 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.byval(i32* byval immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.inalloca(i32* inalloca immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.inreg(i32 inreg immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.nest(i32* nest immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.sret(i32* sret immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.zeroext(i32 zeroext immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.signext(i32 signext immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.returned(i32 returned immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.noalias(i32* noalias immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.readnone(i32* readnone immarg) + +; CHECK: Attribute 'immarg' is incompatible with other attributes +declare void @llvm.immarg.readonly(i32* readonly immarg) Index: test/Assembler/invalid-immarg2.ll =================================================================== --- /dev/null +++ test/Assembler/invalid-immarg2.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: error: invalid use of parameter-only attribute on a function +declare void @llvm.immarg.func() immarg Index: test/Assembler/invalid-immarg3.ll =================================================================== --- /dev/null +++ test/Assembler/invalid-immarg3.ll @@ -0,0 +1,4 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK: error: invalid use of parameter-only attribute +declare immarg i32 @llvm.immarg.retattr(i32) Index: test/Bitcode/compatibility.ll =================================================================== --- test/Bitcode/compatibility.ll +++ test/Bitcode/compatibility.ll @@ -1681,6 +1681,10 @@ ret i8** getelementptr inbounds ({ [4 x i8*], [4 x i8*] }, { [4 x i8*], [4 x i8*] }* null, i32 0, inrange i32 1, i32 2) } +; immarg attribute +declare void @llvm.test.immarg.intrinsic(i32 immarg) +; CHECK: declare void @llvm.test.immarg.intrinsic(i32 immarg) + ; CHECK: attributes #0 = { alignstack=4 } ; CHECK: attributes #1 = { alignstack=8 } ; CHECK: attributes #2 = { alwaysinline } Index: test/Bitcode/upgrade-memory-intrinsics.ll =================================================================== --- test/Bitcode/upgrade-memory-intrinsics.ll +++ test/Bitcode/upgrade-memory-intrinsics.ll @@ -27,9 +27,9 @@ ret void } -; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) -; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) -; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) +; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) +; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) +; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1) declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) Index: test/CodeGen/AMDGPU/bitcast-vector-extract.ll =================================================================== --- test/CodeGen/AMDGPU/bitcast-vector-extract.ll +++ test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -70,8 +70,8 @@ ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source: ; GCN-NOT: store_dword -define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { - %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 +define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1 %bc = bitcast i64 %undef to <2 x i32> store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out ret void @@ -79,8 +79,8 @@ ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt: ; GCN-NOT: store_dword -define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) #0 { - %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 %c) #1 +define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 { + %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1 %bc = bitcast i64 %undef to <2 x i32> %elt1 = extractelement <2 x i32> %bc, i32 1 store volatile i32 %elt1, i32 addrspace(1)* %out Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -12,34 +12,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 -; Make sure no crash on invalid non-constant -; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32: -; CIVI-DAG: s_mov_b32 m0 -; GFX9-NOT: m0 -define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out - ret void -} - -; Make sure no crash on invalid non-constant -; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32: -; CIVI-DAG: s_mov_b32 m0 -; GFX9-NOT: m0 -define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false) - store i32 %result, i32 addrspace(1)* %out - ret void -} - -; Make sure no crash on invalid non-constant -; GCN-LABEL: {{^}}invalid_variable_volatile_lds_atomic_dec_ret_i32: -define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i1 %volatile.var) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %volatile.var) - store i32 %result, i32 addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32: ; CIVI-DAG: s_mov_b32 m0 ; GFX9-NOT: m0 Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -395,41 +395,6 @@ ret void } -; Undefined selector gets deleted -; SI-LABEL: {{^}}test_div_scale_f32_val_undef_undef: -; SI-NOT: v_div_scale -define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef) - %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 - ret void -} - -; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_undef: -; SI-NOT: v_div_scale -define amdgpu_kernel void @test_div_scale_f32_undef_undef_undef(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 undef) - %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 - ret void -} - -; SI-LABEL: {{^}}test_div_scale_f32_val_val_undef: -; SI-NOT: v_div_scale -define amdgpu_kernel void @test_div_scale_f32_val_val_undef(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 - - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 - - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 undef) - %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 - ret void -} - ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val: ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000 Index: test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -8,14 +8,6 @@ declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 declare half @llvm.fabs.f16(half) #0 -; GCN-LABEL: {{^}}v_fcmp_f32_dynamic_cc: -; GCN: s_endpgm -define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float %src0, float %src1, i32 %cc) { - %result = call i64 @llvm.amdgcn.fcmp.f32(float %src0, float %src1, i32 %cc) - store i64 %result, i64 addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs: ; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -6,15 +6,6 @@ declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 -; No crash on invalid input -; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc: -; GCN: s_endpgm -define amdgpu_kernel void @v_icmp_i32_dynamic_cc(i64 addrspace(1)* %out, i32 %src, i32 %cc) { - %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 %cc) - store i64 %result, i64 addrspace(1)* %out - ret void -} - ; GCN-LABEL: {{^}}v_icmp_i32_eq: ; GCN: v_cmp_eq_u32_e64 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { @@ -181,15 +172,6 @@ ret void } -; GCN-LABEL: {{^}}v_icmp_i16_dynamic_cc: -; GCN: s_endpgm -define amdgpu_kernel void @v_icmp_i16_dynamic_cc(i64 addrspace(1)* %out, i16 %src, i32 %cc) { - %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 %cc) - store i64 %result, i64 addrspace(1)* %out - ret void -} - -; GCN-LABEL: {{^}}v_icmp_i16_eq: ; VI: v_cmp_eq_u16_e64 ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64 Index: test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll @@ -67,9 +67,6 @@ declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0 declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0 -declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i1, i1) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } - - Index: test/DebugInfo/MIR/X86/kill-after-spill.mir =================================================================== --- test/DebugInfo/MIR/X86/kill-after-spill.mir +++ test/DebugInfo/MIR/X86/kill-after-spill.mir @@ -123,9 +123,6 @@ declare i8* @memset(i8*, i32, i64) local_unnamed_addr - ; Function Attrs: nounwind readnone speculatable - declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1 - ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, metadata, metadata) #1 Index: test/LTO/X86/remangle_intrinsics.ll =================================================================== --- test/LTO/X86/remangle_intrinsics.ll +++ test/LTO/X86/remangle_intrinsics.ll @@ -19,6 +19,3 @@ } declare void @llvm.memset.p0struct.rtx_def.i32(%struct.rtx_def*, i8, i32, i1) - -; Check that remangling code doesn't fail on an intrinsic with wrong signature -declare void @llvm.memset.i64(i8* nocapture, i8, i64) nounwind Index: test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll =================================================================== --- test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll +++ test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll @@ -125,15 +125,6 @@ ret i64 %ret } -; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64( -; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64* -; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var) -define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var) - ret i64 %ret -} - declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) #1 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) #1 declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 Index: test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll =================================================================== --- test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -320,229 +320,229 @@ ; -------------------------------------------------------------------- ; CHECK-LABEL: @raw_buffer_load_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @raw_buffer_load_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @raw_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32( -; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -560,229 +560,229 @@ ; -------------------------------------------------------------------- ; CHECK-LABEL: @raw_buffer_load_format_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @raw_buffer_load_format_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @raw_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32( -; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.raw.buffer.load.format.i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.raw.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -800,229 +800,229 @@ ; -------------------------------------------------------------------- ; CHECK-LABEL: @struct_buffer_load_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @struct_buffer_load_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @struct_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32( -; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -1040,229 +1040,229 @@ ; -------------------------------------------------------------------- ; CHECK-LABEL: @struct_buffer_load_format_f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret float %data } ; CHECK-LABEL: @struct_buffer_load_format_v1f32( -; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <1 x float> %data -define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <1 x float> %data } ; CHECK-LABEL: @struct_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> %data -define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <2 x float> %data } ; CHECK-LABEL: @struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <4 x float> %data -define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ret <4 x float> %data } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32( -; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <2 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <4 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <4 x float> %data, i32 3 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <4 x float> %data, i32 3 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf -define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret float %data -define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt0 = extractelement <3 x float> %data, i32 0 ret float %elt0 } ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <2 x float> %data, i32 1 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 1 ret float %elt1 } ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %elt1 = extractelement <3 x float> %data, i32 2 ; CHECK-NEXT: ret float %elt1 -define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %elt1 = extractelement <3 x float> %data, i32 2 ret float %elt1 } ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: ret <2 x float> -define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32( -; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf -define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32( -; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast float %tmp to i32 ; CHECK-NEXT: ret i32 %tmp2 -define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> %tmp2 = extractelement <4 x i32> %tmp1, i32 0 ret i32 %tmp2 } ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4i32( -; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +; CHECK-NEXT: %tmp = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) ; CHECK-NEXT: %tmp2 = bitcast i32 %tmp to float ; CHECK-NEXT: ret float %tmp2 -define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) +define float @extract0_bitcast_struct_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %tmp = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) %tmp1 = bitcast <4 x i32> %tmp to <4 x float> %tmp2 = extractelement <4 x float> %tmp1, i32 0 ret float %tmp2 } ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32( -; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 ; CHECK-NEXT: ret float %data -define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent) #0 { - %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %coherent), !fpmath !0 +define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { + %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 %elt0 = extractelement <2 x float> %data, i32 0 ret float %elt0 } @@ -1319,16 +1319,6 @@ ret float %elt0 } -; CHECK-LABEL: @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32( -; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) -; CHECK-NEXT: %elt0 = extractelement <4 x float> %data, i32 0 -; CHECK-NEXT: ret float %elt0 -define amdgpu_ps float @extract_elt0_invalid_dmask_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask) #0 { - %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %elt0 = extractelement <4 x float> %data, i32 0 - ret float %elt0 -} - ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32( ; CHECK-NEXT: ret float undef define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { @@ -2395,14 +2385,7 @@ ret float %elt0 } -; Verify that we don't creash on non-constant operand. -define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 zeroext, i32, i32) local_unnamed_addr { - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 %0, float %1, float %2, float %3, float %4, <8 x i32> %5, <4 x i32> %6, i1 zeroext %7, i32 %8, i32 %9) #1 - ret <4 x half> %tmp -} - declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 -declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) ; -------------------------------------------------------------------- ; TFE / LWE Index: test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1078,17 +1078,7 @@ ; llvm.amdgcn.exp ; -------------------------------------------------------------------- -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly - -; Make sure no crashing on invalid variable params -; CHECK-LABEL: @exp_invalid_inputs( -; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) -; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) -define void @exp_invalid_inputs(i32 %tgt, i32 %en) { - call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) - call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) - ret void -} +declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) nounwind inaccessiblememonly ; CHECK-LABEL: @exp_disabled_inputs_to_undef( ; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false) @@ -1136,16 +1126,7 @@ ; llvm.amdgcn.exp.compr ; -------------------------------------------------------------------- -declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly - -; CHECK-LABEL: @exp_compr_invalid_inputs( -; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false) -; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false) -define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) { - call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false) - call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false) - ret void -} +declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) nounwind inaccessiblememonly ; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef( ; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false) @@ -1404,17 +1385,9 @@ ; llvm.amdgcn.icmp ; -------------------------------------------------------------------- -declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent -declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent -declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent - -; Make sure there's no crash for invalid input -; CHECK-LABEL: @invalid_nonconstant_icmp_code( -; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) -define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) { - %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) - ret i64 %result -} +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent ; CHECK-LABEL: @invalid_icmp_code( ; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31) @@ -2012,15 +1985,7 @@ ; llvm.amdgcn.fcmp ; -------------------------------------------------------------------- -declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent - -; Make sure there's no crash for invalid input -; CHECK-LABEL: @invalid_nonconstant_fcmp_code( -; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) -define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) { - %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) - ret i64 %result -} +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent ; CHECK-LABEL: @invalid_fcmp_code( ; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1) Index: test/Transforms/LowerExpectIntrinsic/PR33346.ll =================================================================== --- test/Transforms/LowerExpectIntrinsic/PR33346.ll +++ test/Transforms/LowerExpectIntrinsic/PR33346.ll @@ -7,12 +7,12 @@ store i64 %arg, i64* %tmp, align 8 %tmp1 = load i64, i64* %tmp, align 8 %tmp2 = load i64, i64* %tmp, align 8 - %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 %tmp2) + %tmp3 = call i64 @llvm.expect.i64(i64 %tmp1, i64 123) ret i64 %tmp3 } ; Function Attrs: nounwind readnone -declare i64 @llvm.expect.i64(i64, i64) +declare i64 @llvm.expect.i64(i64, i64 immarg) !llvm.module.flags = !{!0} Index: test/Verifier/AMDGPU/intrinsic-immarg.ll =================================================================== --- /dev/null +++ test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -0,0 +1,462 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) +define void @buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false) + %data0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 %bool, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool) + %data1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 %bool) + ret void +} + +declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) +define void @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) +define void @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) +define void @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) +define void @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 %arg) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) +define void @invalid_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc, i32 %dmask, i1 %bool, i32 %arg) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %dmask + ; CHECK-NEXT: %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + %data0 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 %dmask, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) + %data1 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 %bool, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) + %data2 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 %arg, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg + ; CHECK-NEXT: %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) + %data3 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 %arg) + ret void +} + +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) +define void @exp_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %en + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %tgt + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false) + call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 %bool, i1 false) + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 %bool, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 %bool) + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 %bool) + ret void +} + +declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) + +define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %en + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> , <2 x half> , i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %tgt + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> , <2 x half> , i1 true, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 %bool, i1 false) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 %bool, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 false, i1 %bool) + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 5, <2 x half> , <2 x half> , i1 false, i1 %bool) + ret void +} + +declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) + +define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %c + ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) + %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) + ret i64 %result +} + +declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) +define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %c + ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) + %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) + ret i64 %result +} + +declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) +define amdgpu_kernel void @invalid_atomic_inc(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + %result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) +define amdgpu_kernel void @invalid_atomic_dec(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %var, i32 0, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %var, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + %result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 %bool) + ret void +} + +declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) +define amdgpu_kernel void @test_div_scale_f32_val_undef_undef(float addrspace(1)* %out) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i1 undef + ; CHECK: %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.000000e+00, float undef, i1 undef) + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 undef) + %result0 = extractvalue { float, i1 } %result, 0 + store float %result0, float addrspace(1)* %out, align 4 + ret void +} + +declare void @llvm.amdgcn.init.exec(i64) +define amdgpu_ps void @init_exec(i64 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i64 %var + ; CHECK-NEXT: call void @llvm.amdgcn.init.exec(i64 %var) + call void @llvm.amdgcn.init.exec(i64 %var) + ret void +} + +declare i32 @llvm.amdgcn.s.sendmsg(i32, i32) +define void @sendmsg(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1) + %val = call i32 @llvm.amdgcn.s.sendmsg(i32 %arg0, i32 %arg1) + ret void +} + +declare i32 @llvm.amdgcn.s.sendmsghalt(i32, i32) +define void @sendmsghalt(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1) + %val = call i32 @llvm.amdgcn.s.sendmsghalt(i32 %arg0, i32 %arg1) + ret void +} + +declare i32 @llvm.amdgcn.s.waitcnt(i32) +define void @waitcnt(i32 %arg0) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0) + %val = call i32 @llvm.amdgcn.s.waitcnt(i32 %arg0) + ret void +} + +declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + %val0 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + %val1 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + %val2 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + %val3 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + %val4 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + %val5 = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +define amdgpu_kernel void @ds_ordered_swap(i32 addrspace(2)* %gds, i32 addrspace(1)* %out, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + %val0 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 %var, i32 0, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + %val1 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 %var, i1 false, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + %val2 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 %bool, i32 1, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + %val3 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 %var, i1 true, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + %val4 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 %bool, i1 true) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + %val5 = call i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) +define amdgpu_kernel void @mov_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 true) + %val0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 %var, i32 1, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 true) + %val1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 %var, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 true) + %val2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 %var, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool) + %val3 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in1, i32 1, i32 1, i32 1, i1 %bool) + ret void +} + +declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) +define amdgpu_kernel void @update_dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 true) + %val0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 %var, i32 1, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 true) + %val1 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 %var, i32 1, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 true) + %val2 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 %var, i1 1) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool) + %val3 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 %bool) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) +define amdgpu_ps void @load_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 %var, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %var, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) + %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 %var) + ret void +} + +declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) +define amdgpu_ps void @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %val) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 %val, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) + %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 %val) + ret void +} + +declare {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) +define amdgpu_ps void @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s, i32 %var, i1 %bool) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + %val0 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %bool + ; CHECK-NEXT: %val1 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) + %val1 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 %bool, i32 1, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) + %val2 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 16, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 %var, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val3 = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) + %val3 = call {<4 x float>, i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 %var, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 %var) + ret void +} + +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) +define amdgpu_ps void @load_1d_a16(<8 x i32> inreg %rsrc, <2 x i16> %coords, i16 %s, i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + %val0 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 %var, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) + %val1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 %var, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) + %val2 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 %var) + ret void +} + +declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) +define amdgpu_ps void @raw_buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 %data, i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var) + %val2 = call i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 %var) + ret void +} + +declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) +define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %val) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val0 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + %val1 = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + ret void +} + +declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0 +define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s, i32 %val) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + %val0 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 %val, i32 0) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %val + ; CHECK-NEXT: %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + %val1 = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 %val) + ret void +} Index: test/Verifier/AMDGPU/lit.local.cfg =================================================================== --- /dev/null +++ test/Verifier/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True Index: test/Verifier/cttz-undef-arg.ll =================================================================== --- test/Verifier/cttz-undef-arg.ll +++ test/Verifier/cttz-undef-arg.ll @@ -5,11 +5,13 @@ define void @f(i32 %x, i1 %is_not_zero) { entry: -; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i1 %is_not_zero ; CHECK-NEXT: @llvm.ctlz.i32 call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero) -; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i1 %is_not_zero ; CHECK-NEXT: @llvm.cttz.i32 call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero) ret void Index: test/Verifier/element-wise-atomic-memory-intrinsics.ll =================================================================== --- test/Verifier/element-wise-atomic-memory-intrinsics.ll +++ test/Verifier/element-wise-atomic-memory-intrinsics.ll @@ -1,8 +1,11 @@ ; RUN: not opt -verify < %s 2>&1 | FileCheck %s define void @test_memcpy(i8* %P, i8* %Q, i32 %A, i32 %E) { - ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %E + ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3) @@ -21,11 +24,15 @@ ret void } + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind define void @test_memmove(i8* %P, i8* %Q, i32 %A, i32 %E) { - ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %E + ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %P, i8* align 4 %Q, i32 1, i32 3) @@ -44,11 +51,15 @@ ret void } + declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) { - ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %E + ; CHECK: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E) call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3) Index: test/Verifier/immarg-param-attribute-invalid.ll =================================================================== --- /dev/null +++ test/Verifier/immarg-param-attribute-invalid.ll @@ -0,0 +1,107 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare void @llvm.test.immarg.intrinsic.i32(i32 immarg) +declare void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> immarg) +declare void @llvm.test.immarg.intrinsic.f32(float immarg) +declare void @llvm.test.immarg.intrinsic.v2f32(<2 x float> immarg) +declare void @llvm.test.immarg.intrinsic.2ai32([2 x i32] immarg) + +@gv = global i32 undef, align 4 + +define void @call_llvm.test.immarg.intrinsic.i32(i32 %arg) { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i32 undef +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 undef) + call void @llvm.test.immarg.intrinsic.i32(i32 undef) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: i32 %arg +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 %arg) + call void @llvm.test.immarg.intrinsic.i32(i32 %arg) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 ptrtoint (i32* @gv to i32) + ; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32)) + call void @llvm.test.immarg.intrinsic.i32(i32 ptrtoint (i32* @gv to i32)) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.f32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: float undef +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.f32(float undef) + call void @llvm.test.immarg.intrinsic.f32(float undef) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.v2i32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x i32> zeroinitializer +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer) + call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> zeroinitializer) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x i32> +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> ) + call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> ) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x i32> undef +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef) + call void @llvm.test.immarg.intrinsic.v2i32(<2 x i32> undef) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.v2f32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x float> zeroinitializer +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer) + call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> zeroinitializer) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: <2 x float> +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> ) + call void @llvm.test.immarg.intrinsic.v2f32(<2 x float> ) + ret void +} + +define void @call_llvm.test.immarg.intrinsic.2ai32() { +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: [2 x i32] zeroinitializer +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer) + call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] zeroinitializer) + +; CHECK: immarg operand has non-immediate parameter +; CHECK-NEXT: [2 x i32] [i32 1, i32 2] +; CHECK-NEXT: call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2]) + call void @llvm.test.immarg.intrinsic.2ai32([2 x i32] [i32 1, i32 2]) + ret void +} + +; CHECK: immarg attribute only applies to intrinsics +; CHECK-NEXT: void (i32)* @not_an_intrinsic +declare void @not_an_intrinsic(i32 immarg) + +declare void @llvm.test.intrinsic(i32) +declare void @func(i32) + +define void @only_on_callsite() { +; CHECK: immarg attribute only applies to intrinsics +; CHECK-NEXT: call void @func(i32 immarg 0) +; CHECK-NEXT: immarg may not apply only to call sites +; CHECK-NEXT: i32 0 +; CHECK-NEXT: call void @func(i32 immarg 0) + call void @func(i32 immarg 0) + +; CHECK: immarg may not apply only to call sites +; CHECK-NEXT: i32 0 +; CHECK-NEXT: call void @llvm.test.intrinsic(i32 immarg 0) + call void @llvm.test.intrinsic(i32 immarg 0) + ret void +} + +; CHECK: immarg attribute only applies to intrinsics +; CHECK: void (i32)* @on_function_definition +define void @on_function_definition(i32 immarg %arg) { + ret void +} Index: test/Verifier/intrinsic-immarg.ll =================================================================== --- /dev/null +++ test/Verifier/intrinsic-immarg.ll @@ -0,0 +1,129 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare i8* @llvm.returnaddress(i32) +define void @return_address(i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result = call i8* @llvm.returnaddress(i32 %var) + %result = call i8* @llvm.returnaddress(i32 %var) + ret void +} + +declare i8* @llvm.frameaddress(i32) +define void @frame_address(i32 %var) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %var + ; CHECK-NEXT: %result = call i8* @llvm.frameaddress(i32 %var) + %result = call i8* @llvm.frameaddress(i32 %var) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) +define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) +define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) +define void @memset(i8* %dest, i8 %val, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 8, i1 %is.volatile) + ret void +} + + +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) +define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %a + ; CHECK-NEXT: %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false) + %val0 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 %a, i1 false, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %b + ; CHECK-NEXT: %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false) + %val1 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 %b, i1 false) + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %c + ; CHECK-NEXT: %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c) + %val2 = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr, i1 false, i1 false, i1 %c) + ret void +} + +declare i8 @llvm.expect.i8(i8, i8) +define i8 @expect(i8 %arg0, i8 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i8 %arg1 + ; CHECK-NEXT: %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1) + %ret = call i8 @llvm.expect.i8(i8 %arg0, i8 %arg1) + ret i8 %ret +} + +declare i64 @llvm.smul.fix.i64(i64, i64, i32) +define i64 @smul_fix(i64 %arg0, i64 %arg1, i32 %arg2) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg2 + ; CHECK-NEXT: %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + %ret = call i64 @llvm.smul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + ret i64 %ret +} + +declare i64 @llvm.umul.fix.i64(i64, i64, i32) +define i64 @umul_fix(i64 %arg0, i64 %arg1, i32 %arg2) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg2 + ; CHECK-NEXT: %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + %ret = call i64 @llvm.umul.fix.i64(i64 %arg0, i64 %arg1, i32 %arg2) + ret i64 %ret +} + +declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) +define <2 x double> @masked_load(<2 x i1> %mask, <2 x double>* %addr, <2 x double> %dst, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %align + ; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst) + %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 %align, <2 x i1> %mask, <2 x double> %dst) + ret <2 x double> %res +} + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +define void @masked_store(<4 x i1> %mask, <4 x i32>* %addr, <4 x i32> %val, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %align + ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 %align, <4 x i1> %mask) + ret void +} + +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) +define <2 x double> @test_gather(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK: i32 %align + ; CHECK: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0) + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 %align, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>) +define void @test_scatter_8i32(<8 x i32> %a1, <8 x i32*> %ptr, <8 x i1> %mask, i32 %align) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %align + ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask) + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 %align, <8 x i1> %mask) + ret void +} Index: utils/TableGen/CodeGenIntrinsics.h =================================================================== --- utils/TableGen/CodeGenIntrinsics.h +++ utils/TableGen/CodeGenIntrinsics.h @@ -136,7 +136,15 @@ // True if the intrinsic is marked as speculatable. bool isSpeculatable; - enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone }; + enum ArgAttribute { + NoCapture, + Returned, + ReadOnly, + WriteOnly, + ReadNone, + ImmArg + }; + std::vector> ArgumentAttributes; bool hasProperty(enum SDNP Prop) const { Index: utils/TableGen/CodeGenTarget.cpp =================================================================== --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -703,6 +703,9 @@ } else if (Property->isSubClassOf("ReadNone")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadNone)); + } else if (Property->isSubClassOf("ImmArg")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, ImmArg)); } else llvm_unreachable("Unknown property!"); } Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -504,7 +504,6 @@ CodeGenIntrinsic::ModRefBehavior LK = L->ModRef; CodeGenIntrinsic::ModRefBehavior RK = R->ModRef; if (LK != RK) return (LK > RK); - // Order by argument attributes. // This is reliable because each side is already sorted internally. return (L->ArgumentAttributes < R->ArgumentAttributes); @@ -612,6 +611,12 @@ OS << "Attribute::ReadNone"; addComma = true; break; + case CodeGenIntrinsic::ImmArg: + if (addComma) + OS << ","; + OS << "Attribute::ImmArg"; + addComma = true; + break; } ++ai; Index: utils/emacs/llvm-mode.el =================================================================== --- utils/emacs/llvm-mode.el +++ utils/emacs/llvm-mode.el @@ -27,7 +27,7 @@ "noduplicate" "noimplicitfloat" "noinline" "nonlazybind" "noredzone" "noreturn" "norecurse" "nounwind" "optnone" "optsize" "readnone" "readonly" "returns_twice" "speculatable" "ssp" "sspreq" "sspstrong" "safestack" "sanitize_address" "sanitize_hwaddress" - "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly") 'symbols) . font-lock-constant-face) + "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly" "immarg") 'symbols) . font-lock-constant-face) ;; Variables '("%[-a-zA-Z$._][-a-zA-Z$._0-9]*" . font-lock-variable-name-face) ;; Labels