Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td @@ -187,6 +187,10 @@ llvm_i32_ty], // bit offset of the thread count [IntrConvergent]>; +def int_amdgcn_wavefrontsize : + GCCBuiltin<"__builtin_amdgcn_wavefrontsize">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Instruction Intrinsics @@ -1302,11 +1306,11 @@ >; def int_amdgcn_icmp : - Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], + Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem, IntrConvergent, ImmArg<2>]>; def int_amdgcn_fcmp : - Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], + Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty], [IntrNoMem, IntrConvergent, ImmArg<2>]>; def int_amdgcn_readfirstlane : @@ -1576,23 +1580,23 @@ // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. // ===----------------------------------------------------------------------===// -def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty], +def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], [llvm_i1_ty], [IntrConvergent] >; -def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty], - [llvm_i64_ty], [IntrConvergent] +def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty], + [llvm_anyint_ty], [IntrConvergent] >; -def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], - [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent] +def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty], + [llvm_i1_ty, llvm_anyint_ty], [IntrNoMem, IntrConvergent] >; def int_amdgcn_loop : Intrinsic<[llvm_i1_ty], - [llvm_i64_ty], [IntrConvergent] + [llvm_anyint_ty], [IntrConvergent] >; -def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>; +def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], [IntrConvergent]>; // Represent unreachable in a divergent region. def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -249,7 +249,8 @@ // We need to know how many lanes are active within the wavefront, and we do // this by doing a ballot of active lanes. CallInst *const Ballot = - B.CreateIntrinsic(Intrinsic::amdgcn_icmp, {B.getInt32Ty()}, + B.CreateIntrinsic(Intrinsic::amdgcn_icmp, + {B.getInt64Ty(), B.getInt32Ty()}, {B.getInt32(1), B.getInt32(0), B.getInt32(33)}); // We need to know how many lanes are active within the wavefront that are Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1028,6 +1028,10 @@ std::vector> &Mutations) const override; + bool isWave32() const { + return WavefrontSize == 32; + } + /// \returns Maximum number of work groups per compute unit supported by the /// subtarget and limited by given \p FlatWorkGroupSize. unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { Index: llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" @@ -55,13 +57,13 @@ Type *Boolean; Type *Void; - Type *Int64; + Type *IntMask; Type *ReturnStruct; ConstantInt *BoolTrue; ConstantInt *BoolFalse; UndefValue *BoolUndef; - Constant *Int64Zero; + Constant *IntMaskZero; Function *If; Function *Else; @@ -74,6 +76,8 @@ LoopInfo *LI; + void initialize(Module &M, const GCNSubtarget &ST); + bool isUniform(BranchInst *T); bool isTopOfStack(BasicBlock *BB); @@ -103,8 +107,6 @@ SIAnnotateControlFlow() : FunctionPass(ID) {} - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "SI annotate control flow"; } @@ -114,6 +116,7 @@ AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); FunctionPass::getAnalysisUsage(AU); } }; @@ -124,31 +127,34 @@ "Annotate SI Control Flow", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE, "Annotate SI Control Flow", false, false) char SIAnnotateControlFlow::ID = 0; /// Initialize all the types and constants used in the pass -bool SIAnnotateControlFlow::doInitialization(Module &M) { +void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) { LLVMContext &Context = M.getContext(); Void = Type::getVoidTy(Context); Boolean = Type::getInt1Ty(Context); - Int64 = Type::getInt64Ty(Context); - ReturnStruct = StructType::get(Boolean, Int64); + IntMask = ST.isWave32() ? Type::getInt32Ty(Context) + : Type::getInt64Ty(Context); + ReturnStruct = StructType::get(Boolean, IntMask); BoolTrue = ConstantInt::getTrue(Context); BoolFalse = ConstantInt::getFalse(Context); BoolUndef = UndefValue::get(Boolean); - Int64Zero = ConstantInt::get(Int64, 0); + IntMaskZero = ConstantInt::get(IntMask, 0); - If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if); - Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else); - IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break); - Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop); - EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf); - return false; + If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask }); + Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else, + { IntMask, IntMask }); + IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break, + { IntMask, IntMask }); + Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask }); + EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask }); } /// Is the branch condition uniform or did the StructurizeCFG pass @@ -258,14 +264,14 @@ return; BasicBlock *Target = Term->getSuccessor(1); - PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front()); + PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front()); Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); Value *Arg = handleLoopCondition(Cond, Broken, L, Term); for (BasicBlock *Pred : predecessors(Target)) { - Value *PHIValue = Int64Zero; + Value *PHIValue = IntMaskZero; if (Pred == BB) // Remember the value of the previous iteration. PHIValue = Arg; // If the backedge from Pred to Target could be executed before the exit @@ -316,6 +322,10 @@ DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); DA = &getAnalysis(); + TargetPassConfig &TPC = getAnalysis(); + const TargetMachine &TM = TPC.getTM(); + + initialize(*F.getParent(), TM.getSubtarget(F)); for (df_iterator I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3839,7 +3839,6 @@ ICmpInst::Predicate IcInput = static_cast(CondCode); - SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); @@ -3855,8 +3854,14 @@ ISD::CondCode CCOpcode = getICmpCondCode(IcInput); - return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS, - DAG.getCondCode(CCOpcode)); + unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize(); + EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize); + + SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS, + DAG.getCondCode(CCOpcode)); + if (VT.bitsEq(CCVT)) + return SetCC; + return DAG.getZExtOrTrunc(SetCC, DL, VT); } static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI, @@ -3882,8 +3887,13 @@ FCmpInst::Predicate IcInput = static_cast(CondCode); ISD::CondCode CCOpcode = getFCmpCondCode(IcInput); - return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0, - Src1, DAG.getCondCode(CCOpcode)); + unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize(); + EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize); + SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0, + Src1, DAG.getCondCode(CCOpcode)); + if (VT.bitsEq(CCVT)) + return SetCC; + return DAG.getZExtOrTrunc(SetCC, SL, VT); } void SITargetLowering::ReplaceNodeResults(SDNode *N, @@ -5394,6 +5404,9 @@ return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDZ); + case Intrinsic::amdgcn_wavefrontsize: + return DAG.getConstant(MF.getSubtarget().getWavefrontSize(), + SDLoc(Op), MVT::i32); case Intrinsic::amdgcn_s_buffer_load: { unsigned Cache = cast(Op.getOperand(3))->getZExtValue(); return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), @@ -5598,6 +5611,11 @@ case Intrinsic::amdgcn_fmad_ftz: return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::amdgcn_if_break: + return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT, + Op->getOperand(1), Op->getOperand(2)), 0); + default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) @@ -6495,6 +6513,10 @@ M->getMemoryVT(), M->getMemOperand()); } + case Intrinsic::amdgcn_end_cf: + return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other, + Op->getOperand(2), Chain), 0); + default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -604,7 +604,12 @@ // TODO: we could add more variants for other types of conditionals def : Pat < - (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)), + (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), + (COPY $src) // Return the SGPRs representing i1 src +>; + +def : Pat < + (i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))), (COPY $src) // Return the SGPRs representing i1 src >; Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3733,7 +3733,9 @@ break; Function *NewF = - Intrinsic::getDeclaration(II->getModule(), NewIID, SrcLHS->getType()); + Intrinsic::getDeclaration(II->getModule(), NewIID, + { II->getType(), + SrcLHS->getType() }); Value *Args[] = { SrcLHS, SrcRHS, ConstantInt::get(CC->getType(), SrcPred) }; CallInst *NewCall = Builder.CreateCall(NewF, Args); Index: llvm/trunk/test/CodeGen/AMDGPU/diverge-switch-default.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/diverge-switch-default.ll +++ llvm/trunk/test/CodeGen/AMDGPU/diverge-switch-default.ll @@ -38,8 +38,8 @@ ; CHECK: load i8 ; CHECK-NOT: {{ br }} ; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq -; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break(i1 [[ICMP]], i64 [[PHI]]) -; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop(i64 [[IF]]) +; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[ICMP]], i64 [[PHI]]) +; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]]) ; CHECK: br i1 [[LOOP]] sw.while: Index: llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll +++ llvm/trunk/test/CodeGen/AMDGPU/loop_break.ll @@ -15,12 +15,12 @@ ; OPT: br label %Flow ; OPT: Flow: -; OPT: call i64 @llvm.amdgcn.if.break( -; OPT: call i1 @llvm.amdgcn.loop(i64 +; OPT: call i64 @llvm.amdgcn.if.break.i64.i64( +; OPT: call i1 @llvm.amdgcn.loop.i64(i64 ; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1 ; OPT: bb9: -; OPT: call void @llvm.amdgcn.end.cf(i64 +; OPT: call void @llvm.amdgcn.end.cf.i64(i64 ; GCN-LABEL: {{^}}break_loop: ; GCN: s_mov_b64 [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} @@ -84,12 +84,12 @@ ; OPT: Flow: ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0) +; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) +; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) ; OPT-NEXT: br i1 %1, label %bb9, label %bb1 ; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) ; OPT-NEXT: store volatile i32 7 ; OPT-NEXT: ret void define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { @@ -138,12 +138,12 @@ ; OPT: Flow: ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0) +; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) +; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) ; OPT-NEXT: br i1 %1, label %bb9, label %bb1 ; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) ; OPT-NEXT: store volatile i32 7 ; OPT-NEXT: ret void define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { @@ -189,12 +189,12 @@ ; OPT: Flow: ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0) +; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) +; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) ; OPT-NEXT: br i1 %1, label %bb9, label %bb1 ; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) ; OPT-NEXT: store volatile i32 7 ; OPT-NEXT: ret void define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { @@ -239,12 +239,12 @@ ; OPT: Flow: ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] -; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken) -; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0) +; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken) +; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0) ; OPT-NEXT: br i1 %1, label %bb9, label %bb1 ; OPT: bb9: ; preds = %Flow -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0) ; OPT-NEXT: store volatile i32 7 ; OPT-NEXT: ret void define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { @@ -294,12 +294,12 @@ ; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] ; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] ; OPT-NEXT: %0 = xor i1 %tmp3, true -; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken) -; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1) +; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %0, i64 %phi.broken) +; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1) ; OPT-NEXT: br i1 %2, label %bb9, label %bb1 ; OPT: bb9: -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1) ; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef ; OPT-NEXT: ret void define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { Index: llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ llvm/trunk/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -9,7 +9,7 @@ ; StructurizeCFG. ; IR-LABEL: @multi_divergent_region_exit_ret_ret( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0) ; IR: %2 = extractvalue { i1, i64 } %1, 0 ; IR: %3 = extractvalue { i1, i64 } %1, 1 ; IR: br i1 %2, label %LeafBlock1, label %Flow @@ -17,7 +17,7 @@ ; IR: Flow: ; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3) ; IR: %7 = extractvalue { i1, i64 } %6, 0 ; IR: %8 = extractvalue { i1, i64 } %6, 1 ; IR: br i1 %7, label %LeafBlock, label %Flow1 @@ -30,8 +30,8 @@ ; IR: Flow2: ; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11) ; IR: %13 = extractvalue { i1, i64 } %12, 0 ; IR: %14 = extractvalue { i1, i64 } %12, 1 ; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock @@ -43,8 +43,8 @@ ; IR: Flow1: ; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] ; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %8) -; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %8) +; IR: %17 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %16) ; IR: %18 = extractvalue { i1, i64 } %17, 0 ; IR: %19 = extractvalue { i1, i64 } %17, 1 ; IR: br i1 %18, label %exit1, label %Flow2 @@ -54,7 +54,7 @@ ; IR: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %14) ; IR: ret void @@ -141,13 +141,13 @@ } ; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0) -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3) ; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11) ; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock @@ -203,7 +203,7 @@ ; IR: {{^}}Flow: ; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3) ; IR: br i1 %7, label %LeafBlock, label %Flow1 ; IR: {{^}}LeafBlock: @@ -218,8 +218,8 @@ ; IR: Flow2: ; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11) ; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock ; IR: exit0: @@ -229,8 +229,8 @@ ; IR: {{^}}Flow1: ; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] ; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %8) -; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %8) +; IR: %17 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %16) ; IR: %18 = extractvalue { i1, i64 } %17, 0 ; IR: %19 = extractvalue { i1, i64 } %17, 1 ; IR: br i1 %18, label %exit1, label %Flow2 @@ -240,7 +240,7 @@ ; IR: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %14) ; IR: ret void define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 { entry: @@ -279,17 +279,17 @@ } ; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0) ; IR: br i1 %2, label %LeafBlock1, label %Flow ; IR: Flow: ; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3) ; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11) define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 { entry: @@ -330,11 +330,11 @@ ; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value( ; IR: Flow2: ; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19) ; IR: UnifiedReturnBlock: ; IR: %UnifiedRetVal = phi float [ 2.000000e+00, %Flow2 ], [ 1.000000e+00, %exit0 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %14) ; IR: ret float %UnifiedRetVal define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 { entry: @@ -402,17 +402,17 @@ } ; IR-LABEL: @multi_divergent_region_exit_ret_unreachable( -; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) +; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0) ; IR: Flow: ; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ] -; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) +; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3) ; IR: Flow2: ; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ] -; IR: call void @llvm.amdgcn.end.cf(i64 %19) -; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19) +; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11) ; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock ; IR: exit0: @@ -422,8 +422,8 @@ ; IR: Flow1: ; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] ; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ] -; IR: call void @llvm.amdgcn.end.cf(i64 %8) -; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16) +; IR: call void @llvm.amdgcn.end.cf.i64(i64 %8) +; IR: %17 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %16) ; IR: %18 = extractvalue { i1, i64 } %17, 0 ; IR: %19 = extractvalue { i1, i64 } %17, 1 ; IR: br i1 %18, label %exit1, label %Flow2 @@ -434,7 +434,7 @@ ; IR-NEXT: br label %Flow2 ; IR: UnifiedReturnBlock: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %14) ; IR-NEXT: ret void define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 { entry: @@ -490,7 +490,7 @@ ; IR-NEXT: br label %Flow2 ; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2 -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %14) ; IR-NEXT: ret void define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 { entry: @@ -645,7 +645,7 @@ ; IR: br i1 %11, label %uniform.endif, label %uniform.ret0 ; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2 -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %6) ; IR-NEXT: ret void define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 { entry: @@ -691,7 +691,7 @@ ; IR-NEXT: br label %UnifiedReturnBlock ; IR: UnifiedReturnBlock: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 ; IR-NEXT: ret void define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 { bb: Index: llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll +++ llvm/trunk/test/CodeGen/AMDGPU/multilevel-break.ll @@ -5,7 +5,7 @@ ; OPT: main_body: ; OPT: LOOP.outer: ; OPT: LOOP: -; OPT: [[if:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if( +; OPT: [[if:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if.i64( ; OPT: [[if_exec:%[0-9]+]] = extractvalue { i1, i64 } [[if]], 1 ; ; OPT: Flow: @@ -13,9 +13,9 @@ ; Ensure two if.break calls, for both the inner and outer loops ; OPT: call void @llvm.amdgcn.end.cf -; OPT-NEXT: call i64 @llvm.amdgcn.if.break(i1 -; OPT-NEXT: call i1 @llvm.amdgcn.loop(i64 -; OPT-NEXT: call i64 @llvm.amdgcn.if.break(i1 +; OPT-NEXT: call i64 @llvm.amdgcn.if.break.i64.i64(i1 +; OPT-NEXT: call i1 @llvm.amdgcn.loop.i64(i64 +; OPT-NEXT: call i64 @llvm.amdgcn.if.break.i64.i64(i1 ; ; OPT: Flow1: Index: llvm/trunk/test/CodeGen/AMDGPU/nested-loop-conditions.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ llvm/trunk/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -13,7 +13,7 @@ ; IR-NEXT: %phi.broken = phi i64 [ %3, %bb10 ], [ 0, %bb ] ; IR-NEXT: %tmp6 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ] ; IR-NEXT: %tmp7 = icmp eq i32 %tmp6, 1 -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp7) +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %tmp7) ; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 ; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 ; IR-NEXT: br i1 %1, label %bb8, label %Flow @@ -24,14 +24,14 @@ ; IR: bb10: ; IR-NEXT: %tmp11 = phi i32 [ %6, %Flow ] ; IR-NEXT: %tmp12 = phi i1 [ %5, %Flow ] -; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken) -; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop(i64 %3) +; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp12, i64 %phi.broken) +; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop.i64(i64 %3) ; IR-NEXT: br i1 %4, label %bb23, label %bb5 ; IR: Flow: ; IR-NEXT: %5 = phi i1 [ %tmp22, %bb4 ], [ true, %bb5 ] ; IR-NEXT: %6 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %2) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %2) ; IR-NEXT: br label %bb10 ; IR: bb13: @@ -51,7 +51,7 @@ ; IR-NEXT: br label %bb9 ; IR: bb23: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %3) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %3) ; IR-NEXT: ret void ; GCN-LABEL: {{^}}reduced_nested_loop_conditions: @@ -121,27 +121,27 @@ ; IR-LABEL: @nested_loop_conditions( ; IR: Flow3: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %21) -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %14) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %21) +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %14) ; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 ; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 ; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4 ; IR: Flow4: ; IR-NEXT: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %2) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %2) ; IR-NEXT: br label %Flow ; IR: Flow: ; IR-NEXT: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ] -; IR-NEXT: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4) +; IR-NEXT: %5 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %4) ; IR-NEXT: %6 = extractvalue { i1, i64 } %5, 0 ; IR-NEXT: %7 = extractvalue { i1, i64 } %5, 1 ; IR-NEXT: br i1 %6, label %bb13, label %bb31 ; IR: bb14: ; IR: %tmp15 = icmp eq i32 %tmp1037, 1 -; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp15) +; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %tmp15) ; IR: Flow1: ; IR-NEXT: %11 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %bb14 ] @@ -149,9 +149,9 @@ ; IR-NEXT: %13 = phi i1 [ %18, %bb21 ], [ true, %bb14 ] ; IR-NEXT: %14 = phi i1 [ %18, %bb21 ], [ false, %bb14 ] ; IR-NEXT: %15 = phi i1 [ false, %bb21 ], [ true, %bb14 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %10) -; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break(i1 %13, i64 %phi.broken) -; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop(i64 %16) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %10) +; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %13, i64 %phi.broken) +; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop.i64(i64 %16) ; IR-NEXT: br i1 %17, label %Flow2, label %bb14 ; IR: bb21: @@ -160,14 +160,14 @@ ; IR-NEXT: br label %Flow1 ; IR: Flow2: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %16) -; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if(i1 %15) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %16) +; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %15) ; IR-NEXT: %20 = extractvalue { i1, i64 } %19, 0 ; IR-NEXT: %21 = extractvalue { i1, i64 } %19, 1 ; IR-NEXT: br i1 %20, label %bb31.loopexit, label %Flow3 ; IR: bb31: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7) +; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef ; IR-NEXT: ret void Index: llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll +++ llvm/trunk/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll @@ -3,8 +3,8 @@ ; OPT-LABEL: @annotate_unreachable( -; OPT: call { i1, i64 } @llvm.amdgcn.if( -; OPT-NOT: call void @llvm.amdgcn.end.cf( +; OPT: call { i1, i64 } @llvm.amdgcn.if.i64( +; OPT-NOT: call void @llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}annotate_unreachable: Index: llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll +++ llvm/trunk/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll @@ -17,17 +17,17 @@ ; OPT-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP5:%.*]], [[LOOP]] ], [ 0, [[LOOP_END]] ] ; OPT-NEXT: [[TMP5]] = add nsw i32 [[TMP4]], [[TMP]] ; OPT-NEXT: [[TMP6:%.*]] = icmp slt i32 [[ARG]], [[TMP5]] -; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break(i1 [[TMP6]], i64 [[PHI_BROKEN]]) -; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop(i64 [[TMP0]]) +; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[TMP6]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]]) ; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_END]], label [[LOOP]] ; OPT: loop_end: -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 [[TMP0]]) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]]) ; OPT-NEXT: [[EXIT:%.*]] = icmp sgt i32 [[TMP5]], [[TMP2]] -; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break(i1 [[EXIT]], i64 [[PHI_BROKEN1]]) -; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop(i64 [[TMP7]]) +; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[EXIT]], i64 [[PHI_BROKEN1]]) +; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP7]]) ; OPT-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT:%.*]], label [[LOOP]] ; OPT: loop_exit: -; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 [[TMP7]]) +; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]]) ; OPT-NEXT: [[TMP12:%.*]] = zext i32 [[TMP]] to i64 ; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[ARG1:%.*]], i64 [[TMP12]] ; OPT-NEXT: [[TMP14:%.*]] = addrspacecast i32* [[TMP13]] to i32 addrspace(1)* Index: llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1628,19 +1628,19 @@ ; llvm.amdgcn.icmp ; -------------------------------------------------------------------- -declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent -declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent -declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent define i64 @invalid_icmp_code(i32 %a, i32 %b) { ; CHECK-LABEL: @invalid_icmp_code( -; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31) -; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A]], i32 [[B]], i32 42) +; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31) +; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42) ; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]] ; CHECK-NEXT: ret i64 [[OR]] ; - %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31) - %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42) + %under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31) + %over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42) %or = or i64 %under, %over ret i64 %or } @@ -1649,7 +1649,7 @@ ; CHECK-LABEL: @icmp_constant_inputs_false( ; CHECK-NEXT: ret i64 0 ; - %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32) + %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32) ret i64 %result } @@ -1658,236 +1658,236 @@ ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) #5 ; CHECK-NEXT: ret i64 [[RESULT]] ; - %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34) + %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34) ret i64 %result } define i64 @icmp_constant_to_rhs_slt(i32 %x) { ; CHECK-LABEL: @icmp_constant_to_rhs_slt( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[X:%.*]], i32 9, i32 38) +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38) ; CHECK-NEXT: ret i64 [[RESULT]] ; - %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40) + %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40) ret i64 %result } define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ne i32 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp sle i32 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ugt i64 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ugt i64 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 1) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq float %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 14) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp une float %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f64(double [[A:%.*]], double [[B:%.*]], i32 4) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp olt double %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b %sext.cmp = sext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) ret i64 %mask } define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i32 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) ret i64 %mask } define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 14) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq float %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) ret i64 %mask } define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 2) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp ule float %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) ret i64 %mask } define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 13) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp ogt float %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32) ret i64 %mask } define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32) ret i64 %mask } define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) { ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32( ; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_COND]], i32 0, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %zext.cond = zext i1 %cond to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32) ret i64 %mask } define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) { ; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32( ; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_COND]], i32 -1, i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %zext.cond = zext i1 %cond to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32) ret i64 %mask } define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) { ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32( ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[SEXT_COND]], i32 1, i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %sext.cond = sext i1 %cond to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32) ret i64 %mask } define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) { ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32( ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[SEXT_COND]], i32 0, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %sext.cond = sext i1 %cond to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32) ret i64 %mask } define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) { ; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64( ; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[SEXT_COND]], i64 0, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %sext.cond = sext i1 %cond to i64 - %mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32) ret i64 %mask } @@ -1896,46 +1896,46 @@ ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[SEXT_CMP]], i32 1, i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b %sext.cmp = sext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32) ret i64 %mask } define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b %sext.cmp = sext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32) ret i64 %mask } define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp sge i32 %a, %b %sext.cmp = sext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32) ret i64 %mask } define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp sle i32 %a, %b %not = xor i1 %cmp, true %zext.cmp = zext i1 %not to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -1943,12 +1943,12 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4( ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i4 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -1956,23 +1956,23 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i8 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i16 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -1980,12 +1980,12 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36( ; CHECK-NEXT: [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i36 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -1993,37 +1993,36 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i128 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f16(half [[A:%.*]], half [[B:%.*]], i32 1) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq half %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) { -; ; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128( ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq fp128 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -2031,12 +2030,12 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4( ; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i4 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -2044,23 +2043,23 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8( ; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i8 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i16 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -2068,12 +2067,12 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4( ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ult i4 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -2081,23 +2080,23 @@ ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8( ; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16 -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ult i8 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) { ; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16( -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ult i16 %a, %b %zext.cmp = zext i1 %cmp to i32 - %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33) ret i64 %mask } @@ -2106,232 +2105,231 @@ define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i32 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ne i32 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1( ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp sle i32 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ugt i64 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64( ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ugt i64 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 false, i1 %cmp, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32( ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq float %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32( ; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp une float %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp olt double %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i4 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i8 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i16 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i36 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp eq i128 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16( ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq half %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) { -; ; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128( ; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = fcmp oeq fp128 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i4 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i8 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp slt i16 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ult i4 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ult i8 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) { ; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33) +; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33) ; CHECK-NEXT: ret i64 [[MASK]] ; %cmp = icmp ult i16 %a, %b - %mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33) + %mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33) ret i64 %mask } @@ -2339,17 +2337,17 @@ ; llvm.amdgcn.fcmp ; -------------------------------------------------------------------- -declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent +declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent define i64 @invalid_fcmp_code(float %a, float %b) { ; CHECK-LABEL: @invalid_fcmp_code( -; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 -1) -; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A]], float [[B]], i32 16) +; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1) +; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16) ; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]] ; CHECK-NEXT: ret i64 [[OR]] ; - %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1) - %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16) + %under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1) + %over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16) %or = or i64 %under, %over ret i64 %or } @@ -2358,7 +2356,7 @@ ; CHECK-LABEL: @fcmp_constant_inputs_false( ; CHECK-NEXT: ret i64 0 ; - %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1) + %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1) ret i64 %result } @@ -2367,16 +2365,16 @@ ; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) #5 ; CHECK-NEXT: ret i64 [[RESULT]] ; - %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4) + %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4) ret i64 %result } define i64 @fcmp_constant_to_rhs_olt(float %x) { ; CHECK-LABEL: @fcmp_constant_to_rhs_olt( -; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[X:%.*]], float 4.000000e+00, i32 2) +; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2) ; CHECK-NEXT: ret i64 [[RESULT]] ; - %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4) + %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4) ret i64 %result } Index: llvm/trunk/test/Verifier/AMDGPU/intrinsic-immarg.ll =================================================================== --- llvm/trunk/test/Verifier/AMDGPU/intrinsic-immarg.ll +++ llvm/trunk/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -123,22 +123,22 @@ ret void } -declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) +declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %c - ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) - %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c) + ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 %c) + %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 %c) ret i64 %result } -declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) +declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32) define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) { ; CHECK: immarg operand has non-immediate parameter ; CHECK-NEXT: i32 %c - ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) - %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c) + ; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 %c) + %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 %c) ret i64 %result }