Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3432,8 +3432,25 @@ if (auto *CSrc0 = dyn_cast(Src0)) { if (auto *CSrc1 = dyn_cast(Src1)) { Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); - return replaceInstUsesWith(*II, - ConstantExpr::getSExt(CCmp, II->getType())); + if (CCmp->isNullValue()) + return replaceInstUsesWith( + *II, ConstantExpr::getSExt(CCmp, II->getType())); + + // The result of V_ICMP/V_FCMP assembly instructions (which this + // intrinsic exposes) is one bit per thread, masked with the EXEC + // register (which contains the bitmask of live threads). So a + // comparison that always returns true is the same as a read of the + // EXEC register. + Value *NewF = Intrinsic::getDeclaration( + II->getModule(), Intrinsic::read_register, II->getType()); + Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; + MDNode *MD = MDNode::get(II->getContext(), MDArgs); + Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; + CallInst *NewCall = Builder->CreateCall(NewF, Args); + NewCall->addAttribute(AttributeList::FunctionIndex, + Attribute::Convergent); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); } // Canonicalize constants to RHS. Index: test/Transforms/InstCombine/amdgcn-intrinsics.ll =================================================================== --- test/Transforms/InstCombine/amdgcn-intrinsics.ll +++ test/Transforms/InstCombine/amdgcn-intrinsics.ll @@ -1259,7 +1259,7 @@ } ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK: ret i64 -1 +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4 define i64 @icmp_constant_inputs_true() { %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34) ret i64 %result @@ -1524,7 +1524,7 @@ } ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK: ret i64 -1 +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4 define i64 @fcmp_constant_inputs_true() { %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4) ret i64 %result @@ -1536,3 +1536,5 @@ %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4) ret i64 %result } + +; CHECK: attributes #4 = { convergent }