diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -218,6 +218,19 @@ return true; } +static unsigned hashCallInst(CallInst *CI) { + // Don't CSE convergent calls in different basic blocks, because they + // implicitly depend on the set of threads that is currently executing. + if (CI->isConvergent()) { + return hash_combine( + CI->getOpcode(), CI->getParent(), + hash_combine_range(CI->value_op_begin(), CI->value_op_end())); + } + return hash_combine( + CI->getOpcode(), + hash_combine_range(CI->value_op_begin(), CI->value_op_end())); +} + static unsigned getHashValueImpl(SimpleValue Val) { Instruction *Inst = Val.Inst; // Hash in all of the operands as pointers. @@ -320,11 +333,8 @@ // Don't CSE convergent calls in different basic blocks, because they // implicitly depend on the set of threads that is currently executing. - if (CallInst *CI = dyn_cast(Inst); CI && CI->isConvergent()) { - return hash_combine( - Inst->getOpcode(), Inst->getParent(), - hash_combine_range(Inst->value_op_begin(), Inst->value_op_end())); - } + if (CallInst *CI = dyn_cast(Inst)) + return hashCallInst(CI); // Mix in the opcode. return hash_combine( @@ -524,15 +534,21 @@ Instruction *Inst = Val.Inst; // Hash all of the operands as pointers and mix in the opcode. - return hash_combine( - Inst->getOpcode(), - hash_combine_range(Inst->value_op_begin(), Inst->value_op_end())); + return hashCallInst(cast(Inst)); } bool DenseMapInfo::isEqual(CallValue LHS, CallValue RHS) { - Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; if (LHS.isSentinel() || RHS.isSentinel()) - return LHSI == RHSI; + return LHS.Inst == RHS.Inst; + + CallInst *LHSI = cast(LHS.Inst); + CallInst *RHSI = cast(RHS.Inst); + + // Convergent calls implicitly depend on the set of threads that is + // currently executing, so conservatively return false if they are in + // different basic blocks. + if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent()) + return false; return LHSI->isIdenticalTo(RHSI); } diff --git a/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll b/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll --- a/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll +++ b/llvm/test/Transforms/EarlyCSE/AMDGPU/convergent-call.ll @@ -11,9 +11,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COND]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: +; CHECK-NEXT: [[Y1:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) #[[ATTR2]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ] +; CHECK-NEXT: [[Y2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ] ; CHECK-NEXT: [[RET:%.*]] = add i32 [[X1]], [[Y2]] ; CHECK-NEXT: ret i32 [[RET]] ; @@ -57,9 +58,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[COND]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: +; CHECK-NEXT: [[Y1:%.*]] = call i1 @llvm.amdgcn.live.mask() #[[ATTR2]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[Y2:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[X1]], [[IF]] ] +; CHECK-NEXT: [[Y2:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[Y1]], [[IF]] ] ; CHECK-NEXT: [[RET:%.*]] = add i1 [[X1]], [[Y2]] ; CHECK-NEXT: ret i1 [[RET]] ;