diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1595,12 +1595,13 @@ if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return Changed; - // If any of the two call sites has nomerge attribute, stop hoisting. + // If any of the two call sites has nomerge or convergent attribute, stop + // hoisting. if (const auto *CB1 = dyn_cast(I1)) - if (CB1->cannotMerge()) + if (CB1->cannotMerge() || CB1->isConvergent()) return Changed; if (const auto *CB2 = dyn_cast(I2)) - if (CB2->cannotMerge()) + if (CB2->cannotMerge() || CB2->isConvergent()) return Changed; if (isa(I1) || isa(I2)) { @@ -1808,9 +1809,9 @@ // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. - // If the instruction has nomerge attribute, return false. + // If the instruction has nomerge or convergent attribute, return false. if (const auto *C = dyn_cast(I)) - if (C->isInlineAsm() || C->cannotMerge()) + if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent()) return false; // Each instruction must have zero or one use. diff --git a/llvm/test/Transforms/SimplifyCFG/convergent.ll b/llvm/test/Transforms/SimplifyCFG/convergent.ll --- a/llvm/test/Transforms/SimplifyCFG/convergent.ll +++ b/llvm/test/Transforms/SimplifyCFG/convergent.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s -; RUN: opt -S -passes=simplifycfg < %s | FileCheck %s +; RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=CHECK,NOSINK %s +; RUN: opt -S -passes=simplifycfg < %s | FileCheck -check-prefixes=CHECK,NOSINK %s +; RUN: opt -S -passes='simplifycfg' < %s | FileCheck -check-prefixes=CHECK,SINK %s declare void @foo() convergent +declare i32 @tid() +declare i32 @mbcnt(i32 %a, i32 %b) convergent +declare i32 @bpermute(i32 %a, i32 %b) convergent define i32 @test_01(i32 %a) { ; CHECK-LABEL: @test_01( @@ -40,3 +44,98 @@ exit: ret i32 %a } + +define void @test_02(ptr %y.coerce) convergent { +; NOSINK-LABEL: @test_02( +; NOSINK-NEXT: entry: +; NOSINK-NEXT: [[TMP0:%.*]] = tail call i32 @tid() +; NOSINK-NEXT: [[REM:%.*]] = and i32 [[TMP0]], 1 +; NOSINK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[REM]], 0 +; NOSINK-NEXT: br i1 [[CMP_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; NOSINK: if.then: +; NOSINK-NEXT: [[TMP1:%.*]] = tail call i32 @mbcnt(i32 -1, i32 0) +; NOSINK-NEXT: [[TMP2:%.*]] = tail call i32 @mbcnt(i32 -1, i32 [[TMP1]]) +; NOSINK-NEXT: [[AND_I:%.*]] = shl i32 [[TMP2]], 2 +; NOSINK-NEXT: [[ADD_I:%.*]] = and i32 [[AND_I]], -256 +; NOSINK-NEXT: [[SHL_I:%.*]] = or i32 [[ADD_I]], 16 +; NOSINK-NEXT: [[TMP3:%.*]] = tail call i32 @bpermute(i32 [[SHL_I]], i32 [[TMP0]]) +; NOSINK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP0]] to i64 +; NOSINK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y_COERCE:%.*]], i64 [[IDXPROM]] +; NOSINK-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX]], align 4 +; NOSINK-NEXT: br label [[IF_END:%.*]] +; NOSINK: if.else: +; NOSINK-NEXT: [[TMP4:%.*]] = tail call i32 @mbcnt(i32 -1, i32 0) +; NOSINK-NEXT: [[TMP5:%.*]] = tail call i32 @mbcnt(i32 -1, i32 [[TMP4]]) +; NOSINK-NEXT: [[AND_I11:%.*]] = shl i32 [[TMP5]], 2 +; NOSINK-NEXT: [[ADD_I12:%.*]] = and i32 [[AND_I11]], -256 +; NOSINK-NEXT: [[SHL_I13:%.*]] = or i32 [[ADD_I12]], 8 +; NOSINK-NEXT: [[TMP6:%.*]] = tail call i32 @bpermute(i32 [[SHL_I13]], i32 [[TMP0]]) +; NOSINK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP0]] to i64 +; NOSINK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[Y_COERCE]], i64 [[IDXPROM4]] +; NOSINK-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX5]], align 4 +; NOSINK-NEXT: br label [[IF_END]] +; NOSINK: if.end: +; NOSINK-NEXT: ret void +; +; SINK-LABEL: @test_02( +; SINK-NEXT: entry: +; SINK-NEXT: [[TMP0:%.*]] = tail call i32 @tid() +; SINK-NEXT: [[REM:%.*]] = and i32 [[TMP0]], 1 +; SINK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[REM]], 0 +; SINK-NEXT: br i1 [[CMP_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; SINK: if.then: +; SINK-NEXT: [[TMP1:%.*]] = tail call i32 @mbcnt(i32 -1, i32 0) +; SINK-NEXT: [[TMP2:%.*]] = tail call i32 @mbcnt(i32 -1, i32 [[TMP1]]) +; SINK-NEXT: [[AND_I:%.*]] = shl i32 [[TMP2]], 2 +; SINK-NEXT: [[ADD_I:%.*]] = and i32 [[AND_I]], -256 +; SINK-NEXT: [[SHL_I:%.*]] = or i32 [[ADD_I]], 16 +; SINK-NEXT: [[TMP3:%.*]] = tail call i32 @bpermute(i32 [[SHL_I]], i32 [[TMP0]]) +; SINK-NEXT: br label [[IF_END:%.*]] +; SINK: if.else: +; SINK-NEXT: [[TMP4:%.*]] = tail call i32 @mbcnt(i32 -1, i32 0) +; SINK-NEXT: [[TMP5:%.*]] = tail call i32 @mbcnt(i32 -1, i32 [[TMP4]]) +; SINK-NEXT: [[AND_I11:%.*]] = shl i32 [[TMP5]], 2 +; SINK-NEXT: [[ADD_I12:%.*]] = and i32 [[AND_I11]], -256 +; SINK-NEXT: [[SHL_I13:%.*]] = or i32 [[ADD_I12]], 8 +; SINK-NEXT: [[TMP6:%.*]] = tail call i32 @bpermute(i32 [[SHL_I13]], i32 [[TMP0]]) +; SINK-NEXT: br label [[IF_END]] +; SINK: if.end: +; SINK-NEXT: [[DOTSINK:%.*]] = phi i32 [ [[TMP6]], [[IF_ELSE]] ], [ [[TMP3]], [[IF_THEN]] ] +; SINK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP0]] to i64 +; SINK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[Y_COERCE:%.*]], i64 [[IDXPROM4]] +; SINK-NEXT: store i32 [[DOTSINK]], ptr [[ARRAYIDX5]], align 4 +; SINK-NEXT: ret void +; +entry: + %0 = tail call i32 @tid() + %rem = and i32 %0, 1 + %cmp.not = icmp eq i32 %rem, 0 + br i1 %cmp.not, label %if.else, label %if.then + +if.then: + %1 = tail call i32 @mbcnt(i32 -1, i32 0) + %2 = tail call i32 @mbcnt(i32 -1, i32 %1) + %and.i = shl i32 %2, 2 + %add.i = and i32 %and.i, -256 + %shl.i = or i32 %add.i, 16 + %3 = tail call i32 @bpermute(i32 %shl.i, i32 %0) + %idxprom = zext i32 %0 to i64 + %arrayidx = getelementptr inbounds i32, ptr %y.coerce, i64 %idxprom + store i32 %3, ptr %arrayidx + br label %if.end + +if.else: + %4 = tail call i32 @mbcnt(i32 -1, i32 0) + %5 = tail call i32 @mbcnt(i32 -1, i32 %4) + %and.i11 = shl i32 %5, 2 + %add.i12 = and i32 %and.i11, -256 + %shl.i13 = or i32 %add.i12, 8 + %6 = tail call i32 @bpermute(i32 %shl.i13, i32 %0) + %idxprom4 = zext i32 %0 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr %y.coerce, i64 %idxprom4 + store i32 %6, ptr %arrayidx5 + br label %if.end + +if.end: + ret void +}