Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -300,12 +300,13 @@ TI->getType()); } - // Only handle binary operators with one-use here. As with the cast case - // above, it may be possible to relax the one-use constraint, but that needs - // be examined carefully since it may not reduce the total number of - // instructions. - BinaryOperator *BO = dyn_cast(TI); - if (!BO || !TI->hasOneUse() || !FI->hasOneUse()) + // Only handle binary operators (including two-operand getelementptr) with + // one-use here. As with the cast case above, it may be possible to relax the + // one-use constraint, but that needs be examined carefully since it may not + // reduce the total number of instructions. + if (TI->getNumOperands() != 2 || FI->getNumOperands() != 2 || + (!isa(TI) && !isa(TI)) || + !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; // Figure out if the operations have any operands in common. @@ -342,7 +343,18 @@ SI.getName() + ".v", &SI); Value *Op0 = MatchIsOpZero ? MatchOp : NewSI; Value *Op1 = MatchIsOpZero ? NewSI : MatchOp; - return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); + if (BinaryOperator *BO = dyn_cast(TI)) { + return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); + } + if (GetElementPtrInst *TGEP = dyn_cast(TI)) { + GetElementPtrInst *FGEP = dyn_cast(FI); + Type *ElementType = TGEP->getResultElementType(); + return TGEP->isInBounds() && FGEP->isInBounds() + ? GetElementPtrInst::CreateInBounds(ElementType, Op0, {Op1}) + : GetElementPtrInst::Create(ElementType, Op0, {Op1}); + } + llvm_unreachable("Expected BinaryOperator or GEP"); + return nullptr; } static bool isSelect01(const APInt &C1I, const APInt &C2I) { Index: test/Transforms/InstCombine/select-gep.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/select-gep.ll @@ -0,0 +1,125 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; CHECK-LABEL: @test1a +; CHECK: [[CMP:%.*]] = icmp ugt i32* %p, %q +; CHECK: [[SEL:%.*]] = select i1 [[CMP]], i32* %p, i32* %q +; CHECK: [[GEP:%.*]] = getelementptr i32, i32* [[SEL]], i64 4 +; CHECK: ret i32* [[GEP]] +define i32* @test1a(i32* %p, i32* %q) { + %gep1 = getelementptr i32, i32* %p, i64 4 + %gep2 = getelementptr i32, i32* %q, i64 4 + %cmp = icmp ugt i32* %p, %q + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; CHECK-LABEL: @test1b +; CHECK: [[CMP:%.*]] = icmp ugt i32* %p, %q +; CHECK: [[SEL:%.*]] = select i1 [[CMP]], i32* %p, i32* %q +; CHECK: [[GEP:%.*]] = getelementptr i32, i32* [[SEL]], i64 4 +; CHECK: ret i32* [[GEP]] +define i32* @test1b(i32* %p, i32* %q) { + %gep1 = getelementptr inbounds i32, i32* %p, i64 4 + %gep2 = getelementptr i32, i32* %q, i64 4 + %cmp = icmp ugt i32* %p, %q + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; CHECK-LABEL: @test1c +; CHECK: [[CMP:%.*]] = icmp ugt i32* %p, %q +; CHECK: [[SEL:%.*]] = select i1 [[CMP]], i32* %p, i32* %q +; CHECK: [[GEP:%.*]] = getelementptr i32, i32* [[SEL]], i64 4 +; CHECK: ret i32* [[GEP]] +define i32* @test1c(i32* %p, i32* %q) { + %gep1 = getelementptr i32, i32* %p, i64 4 + %gep2 = getelementptr inbounds i32, i32* %q, i64 4 + %cmp = icmp ugt i32* %p, %q + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; CHECK-LABEL: @test1d +; CHECK: [[CMP:%.*]] = icmp ugt i32* %p, %q +; CHECK: [[SEL:%.*]] = select i1 [[CMP]], i32* %p, i32* %q +; CHECK: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[SEL]], i64 4 +; CHECK: ret i32* [[GEP]] +define i32* @test1d(i32* %p, i32* %q) { + %gep1 = getelementptr inbounds i32, i32* %p, i64 4 + %gep2 = getelementptr inbounds i32, i32* %q, i64 4 + %cmp = icmp ugt i32* %p, %q + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; CHECK-LABEL: @test2 +; CHECK: [[CMP:%.*]] = icmp ugt i64 %x, %y +; CHECK: [[SEL:%.*]] = select i1 [[CMP]], i64 %x, i64 %y +; CHECK: [[GEP:%.*]] = getelementptr inbounds i32, i32* %p, i64 [[SEL]] +; CHECK: ret i32* [[GEP]] +define i32* @test2(i32* %p, i64 %x, i64 %y) { + %gep1 = getelementptr inbounds i32, i32* %p, i64 %x + %gep2 = getelementptr inbounds i32, i32* %p, i64 %y + %cmp = icmp ugt i64 %x, %y + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; Three (or more) operand GEPs are currently expected to not be optimised, +; though they could be in principle. +; CHECK-LABEL: @test3a +; CHECK: %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 2, i64 %x +; CHECK: %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 2, i64 %y +; CHECK: %cmp = icmp ugt i64 %x, %y +; CHECK: %select = select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: ret i32* %select +define i32* @test3a([4 x i32]* %p, i64 %x, i64 %y) { + %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 2, i64 %x + %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 2, i64 %y + %cmp = icmp ugt i64 %x, %y + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; CHECK-LABEL: @test3b +; CHECK: %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 %x, i64 2 +; CHECK: %gep2 = getelementptr inbounds i32, i32* %q, i64 %x +; CHECK: %cmp = icmp ugt i64 %x, %y +; CHECK: %select = select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: ret i32* %select +define i32* @test3b([4 x i32]* %p, i32* %q, i64 %x, i64 %y) { + %gep1 = getelementptr inbounds [4 x i32], [4 x i32]* %p, i64 %x, i64 2 + %gep2 = getelementptr inbounds i32, i32* %q, i64 %x + %cmp = icmp ugt i64 %x, %y + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; CHECK-LABEL: @test3c +; CHECK: %gep1 = getelementptr inbounds i32, i32* %p, i64 %x +; CHECK: %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %q, i64 %x, i64 2 +; CHECK: %cmp = icmp ugt i64 %x, %y +; CHECK: %select = select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: ret i32* %select +define i32* @test3c(i32* %p, [4 x i32]* %q, i64 %x, i64 %y) { + %gep1 = getelementptr inbounds i32, i32* %p, i64 %x + %gep2 = getelementptr inbounds [4 x i32], [4 x i32]* %q, i64 %x, i64 2 + %cmp = icmp ugt i64 %x, %y + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +} + +; Shouldn't be optimised as it would mean introducing an extra select +; CHECK-LABEL: @test4 +; CHECK: %gep1 = getelementptr inbounds i32, i32* %p, i64 %x +; CHECK: %gep2 = getelementptr inbounds i32, i32* %q, i64 %y +; CHECK: %cmp = icmp ugt i64 %x, %y +; CHECK: %select = select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: ret i32* %select +define i32* @test4(i32* %p, i32* %q, i64 %x, i64 %y) { + %gep1 = getelementptr inbounds i32, i32* %p, i64 %x + %gep2 = getelementptr inbounds i32, i32* %q, i64 %y + %cmp = icmp ugt i64 %x, %y + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + ret i32* %select +}