diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1778,7 +1778,8 @@ // PHI node (because an operand varies in each input block), add to PHIOperands. static bool canSinkInstructions( ArrayRef Insts, - DenseMap> &PHIOperands) { + DenseMap> &PHIOperands, + const TargetTransformInfo &TTI) { // Prune out obviously bad instructions to move. Each instruction must have // exactly zero or one use, and we check later that use is by a single, common // PHI instruction in the successor. @@ -1894,6 +1895,33 @@ !canReplaceOperandWithVariable(I0, OI)) // We can't create a PHI from this GEP. return false; + + // Return true if I is load/store instruction and OpIdx is pointer index + // of I. + auto IsLoadStorePointerIndex = [](const Instruction *I, unsigned OpIdx) { + if (auto *LI = dyn_cast(I)) + return LI->getPointerOperandIndex() == OpIdx; + if (auto *SI = dyn_cast(I)) + return SI->getPointerOperandIndex() == OpIdx; + return false; + }; + + // It may not be cheap for use variable to replace foldable GEPs for + // load/store instructions. + auto LoadStoreUseFoldableGEP = [&](const Instruction *I) { + if (IsLoadStorePointerIndex(I, OI)) { + if (auto *GEP = dyn_cast(I->getOperand(OI))) { + SmallVector Indices(GEP->indices()); + return TTI.getGEPCost(GEP->getSourceElementType(), + GEP->getPointerOperand(), + Indices) == TargetTransformInfo::TCC_Free; + } + } + return false; + }; + if (any_of(Insts, LoadStoreUseFoldableGEP)) + return false; + for (auto *I : Insts) PHIOperands[I].push_back(I->getOperand(OI)); } @@ -2083,7 +2111,8 @@ /// Check whether BB's predecessors end with unconditional branches. If it is /// true, sink any common code from the predecessors to BB. static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, + const TargetTransformInfo &TTI) { // We support two situations: // (1) all incoming arcs are unconditional // (2) there are non-unconditional incoming arcs @@ -2148,7 +2177,7 @@ DenseMap> PHIOperands; LockstepReverseIterator LRI(UnconditionalPreds); while (LRI.isValid() && - canSinkInstructions(*LRI, PHIOperands)) { + canSinkInstructions(*LRI, PHIOperands, TTI)) { LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n"); InstructionsToSink.insert((*LRI).begin(), (*LRI).end()); @@ -7263,7 +7292,7 @@ return true; if (SinkCommon && Options.SinkCommonInsts) - if (SinkCommonCodeFromPredecessors(BB, DTU) || + if (SinkCommonCodeFromPredecessors(BB, DTU, TTI) || MergeCompatibleInvokes(BB, DTU)) { // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's, // so we may now how duplicate PHI's. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/earlycse-after-simplifycfg-two-entry-phi-node-folding.ll b/llvm/test/Transforms/PhaseOrdering/X86/earlycse-after-simplifycfg-two-entry-phi-node-folding.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/earlycse-after-simplifycfg-two-entry-phi-node-folding.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/earlycse-after-simplifycfg-two-entry-phi-node-folding.ll @@ -10,11 +10,20 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[ISHI:%.*]], 0 -; CHECK-NEXT: [[LO_HI:%.*]] = select i1 [[TOBOOL_NOT]], i64 [[LO:%.*]], i64 [[HI:%.*]] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[LO_HI]] +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[HI:%.*]] +; CHECK-NEXT: [[ARRAYVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[ARRAYVAL]], 1 +; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[LO:%.*]] ; CHECK-NEXT: [[ARRAYVAL2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[INC2:%.*]] = add nsw i32 [[ARRAYVAL2]], 1 ; CHECK-NEXT: store i32 [[INC2]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/sink-common-code.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/RISCV/sink-common-code.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +;RUN: opt -S -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -mtriple=riscv64 -sink-common-insts < %s | FileCheck %s +define i32 @test1(i32 zeroext %flag, ptr %base) { +; CHECK-LABEL: define i32 @test1 +; CHECK-SAME: (i32 zeroext [[FLAG:%.*]], ptr [[BASE:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[FLAG]], label [[OTHERWISE:%.*]] [ +; CHECK-NEXT: i32 2, label [[CASE1:%.*]] +; CHECK-NEXT: i32 7, label [[CASE2:%.*]] +; CHECK-NEXT: i32 11, label [[IF_END:%.*]] +; CHECK-NEXT: ] +; CHECK: otherwise: +; CHECK-NEXT: [[ADDR0:%.*]] = getelementptr i32, ptr [[BASE]], i32 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADDR0]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: case1: +; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i32, ptr [[BASE]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADDR1]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: case2: +; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr i32, ptr [[BASE]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADDR2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[OTHERWISE]] ], [ [[TMP1]], [[CASE1]] ], [ [[TMP2]], [[CASE2]] ], [ 4, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %flag, label %otherwise [ i32 2, label %case1 + i32 7, label %case2 + i32 11, label %if.end] + +otherwise: + %addr0 = getelementptr i32, ptr %base, i32 4 + %0 = load i32, ptr %addr0 + br label %if.end + +case1: + %addr1 = getelementptr i32, ptr %base, i32 1 + %1 = load i32, ptr %addr1 + br label %if.end + +case2: + %addr2 = getelementptr i32, ptr %base, i32 3 + %2 = load i32, ptr %addr2 + br label %if.end + +if.end: + %res = phi i32 [ %0, %otherwise ], [ %1, %case1 ], [ %2, %case2 ], [4, %entry] + ret i32 %res +} + +; Different GEP position. +define i32 @test2(i32 zeroext %flag, ptr %base, ptr %base2) { +; CHECK-LABEL: define i32 @test2 +; CHECK-SAME: (i32 zeroext [[FLAG:%.*]], ptr [[BASE:%.*]], ptr [[BASE2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[FLAG]], label [[OTHERWISE:%.*]] [ +; CHECK-NEXT: i32 2, label [[CASE1:%.*]] +; CHECK-NEXT: i32 7, label [[CASE2:%.*]] +; CHECK-NEXT: i32 6, label [[CASE3:%.*]] +; CHECK-NEXT: i32 11, label [[IF_END:%.*]] +; CHECK-NEXT: ] +; CHECK: otherwise: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BASE]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: case1: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[BASE2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: case2: +; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr i32, ptr [[BASE]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ADDR2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: case3: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[BASE2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[OTHERWISE]] ], [ [[TMP1]], [[CASE1]] ], [ [[TMP2]], [[CASE2]] ], [ [[TMP3]], [[CASE3]] ], [ 4, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %flag, label %otherwise [ i32 2, label %case1 + i32 7, label %case2 + i32 6, label %case3 + i32 11, label %if.end] + +otherwise: + %0 = load i32, ptr %base + br label %if.end + +case1: + %1 = load i32, ptr %base2 + br label %if.end + +case2: + %addr2 = getelementptr i32, ptr %base, i32 3 + %2 = load i32, ptr %addr2 + br label %if.end + +case3: + %3 = load i32, ptr %base2 + br label %if.end + +if.end: + %res = phi i32 [ %0, %otherwise ], [ %1, %case1 ], [ %2, %case2 ], [ %3, %case3 ], [4, %entry] + ret i32 %res +} + +; Sink load when the offset of pointer not fitted to simm12. +define i32 @test3(i32 zeroext %flag, ptr %base) { +; CHECK-LABEL: define i32 @test3 +; CHECK-SAME: (i32 zeroext [[FLAG:%.*]], ptr [[BASE:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[FLAG]], label [[IF_END_SINK_SPLIT:%.*]] [ +; CHECK-NEXT: i32 2, label [[CASE1:%.*]] +; CHECK-NEXT: i32 7, label [[CASE2:%.*]] +; CHECK-NEXT: i32 11, label [[IF_END:%.*]] +; CHECK-NEXT: ] +; CHECK: case1: +; CHECK-NEXT: br label [[IF_END_SINK_SPLIT]] +; CHECK: case2: +; CHECK-NEXT: br label [[IF_END_SINK_SPLIT]] +; CHECK: if.end.sink.split: +; CHECK-NEXT: [[DOTSINK:%.*]] = phi i32 [ 2500, [[CASE2]] ], [ 3214, [[CASE1]] ], [ 4099, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr i32, ptr [[BASE]], i32 [[DOTSINK]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADDR2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ [[TMP0]], [[IF_END_SINK_SPLIT]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + switch i32 %flag, label %otherwise [ i32 2, label %case1 + i32 7, label %case2 + i32 11, label %if.end] + +otherwise: + %addr0 = getelementptr i32, ptr %base, i32 4099 + %0 = load i32, ptr %addr0 + br label %if.end + +case1: + %addr1 = getelementptr i32, ptr %base, i32 3214 + %1 = load i32, ptr %addr1 + br label %if.end + +case2: + %addr2 = getelementptr i32, ptr %base, i32 2500 + %2 = load i32, ptr %addr2 + br label %if.end + +if.end: + %res = phi i32 [ %0, %otherwise ], [ %1, %case1 ], [ %2, %case2 ], [4, %entry] + ret i32 %res +} diff --git a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll --- a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll @@ -302,14 +302,14 @@ ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[DUMMY:%.*]] = add i32 [[X:%.*]], 5 +; CHECK-NEXT: store volatile i32 [[X]], ptr [[S:%.*]], align 4 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: ; CHECK-NEXT: [[DUMMY1:%.*]] = add i32 [[X]], 6 -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S]], i32 0, i32 1 +; CHECK-NEXT: store volatile i32 [[X]], ptr [[GEPB]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[GEPB_SINK:%.*]] = phi ptr [ [[GEPB]], [[IF_ELSE]] ], [ [[S]], [[IF_THEN]] ] -; CHECK-NEXT: store volatile i32 [[X]], ptr [[GEPB_SINK]], align 4 ; CHECK-NEXT: ret i32 1 ; entry: @@ -472,12 +472,22 @@ define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, ptr %s) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[FLAG:%.*]], i32 1, i32 4 -; CHECK-NEXT: [[DOT2:%.*]] = select i1 [[FLAG]], i32 56, i32 57 -; CHECK-NEXT: [[DUMMY2:%.*]] = add i32 [[X:%.*]], [[DOT]] -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S:%.*]], i32 0, i32 1 +; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[DUMMY:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[SV1:%.*]] = load i32, ptr [[GEPA]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[SV1]], 56 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[DUMMY2:%.*]] = add i32 [[X]], 4 +; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[S]], i32 0, i32 1 ; CHECK-NEXT: [[SV2:%.*]] = load i32, ptr [[GEPB]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[SV2]], [[DOT2]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[SV2]], 57 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata [[META7:![0-9]+]], metadata !DIExpression()), !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[P:%.*]] = phi i1 [ [[CMP1]], [[IF_THEN]] ], [ [[CMP2]], [[IF_ELSE]] ] ; CHECK-NEXT: ret i32 1 ; entry: @@ -523,16 +533,17 @@ ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[DUMMY:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[SV1:%.*]] = load i32, ptr [[S:%.*]], align 4 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: ; CHECK-NEXT: [[DUMMY2:%.*]] = add i32 [[X]], 4 -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[GEPB:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[S]], i32 0, i32 1 +; CHECK-NEXT: [[SV2:%.*]] = load i32, ptr [[GEPB]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[GEPB_SINK:%.*]] = phi ptr [ [[GEPB]], [[IF_ELSE]] ], [ [[S]], [[IF_THEN]] ] +; CHECK-NEXT: [[SV2_SINK:%.*]] = phi i32 [ [[SV2]], [[IF_ELSE]] ], [ [[SV1]], [[IF_THEN]] ] ; CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ 57, [[IF_ELSE]] ], [ 56, [[IF_THEN]] ] -; CHECK-NEXT: [[SV2:%.*]] = load i32, ptr [[GEPB_SINK]], align 4 -; CHECK-NEXT: [[EXT2:%.*]] = zext i32 [[SV2]] to i64 +; CHECK-NEXT: [[EXT2:%.*]] = zext i32 [[SV2_SINK]] to i64 ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[EXT2]], [[DOTSINK]] ; CHECK-NEXT: ret i32 1 ; @@ -1563,7 +1574,7 @@ define void @nontemporal(ptr %ptr, i1 %cond) { ; CHECK-LABEL: @nontemporal( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i64 0, ptr [[PTR:%.*]], align 8, !nontemporal !7 +; CHECK-NEXT: store i64 0, ptr [[PTR:%.*]], align 8, !nontemporal !10 ; CHECK-NEXT: ret void ; entry: