Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -2677,6 +2677,7 @@ Value *BaseReg = nullptr; Value *ScaledReg = nullptr; Value *OriginalValue = nullptr; + User *AddrModeUser = nullptr; enum FieldName { NoField = 0x00, @@ -3347,7 +3348,11 @@ /// Are the AddrModes that we have all just equal to their original values? bool AllAddrModesTrivial = true; + const DataLayout &DL; + public: + AddressingModeCombiner(const DataLayout &DL) : DL(DL) { } + /// \brief Get the combined AddrMode const ExtAddrMode &getAddrMode() const { return AddrModes[0]; @@ -3415,9 +3420,140 @@ if (AllAddrModesTrivial) return false; - // TODO: Combine multiple AddrModes by inserting a select or phi for the + // If we have more than one AddrMode then insert a select or phi for the // field in which the AddrModes differ. - return false; + switch (DifferentField) { + default: + llvm_unreachable("Expected a single field to be different"); + return false; + + case ExtAddrMode::ScaleField: + // We can't cope with scale being different. + return false; + + case ExtAddrMode::BaseRegField: + AddrModes[0].BaseReg = + CreateSelectOrPHI([&](ExtAddrMode &AM) { return AM.BaseReg; }); + if (!AddrModes[0].BaseReg) + return false; + break; + + case ExtAddrMode::BaseGVField: + // Set BaseReg to be the select/phi of the BaseGV. + assert(!AddrModes[0].BaseReg); + AddrModes[0].BaseReg = + CreateSelectOrPHI([&](ExtAddrMode &AM) { return AM.BaseGV; }); + if (!AddrModes[0].BaseReg) + return false; + AddrModes[0].BaseGV = nullptr; + break; + + case ExtAddrMode::BaseOffsField: + { + // Handle differing offsets by setting ScaledReg to be the select/phi of + // the offset values, with a scale of 1. + Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); + assert(AddrModes[0].Scale == 0); + assert(!AddrModes[0].ScaledReg); + AddrModes[0].Scale = 1; + AddrModes[0].ScaledReg = CreateSelectOrPHI([&](ExtAddrMode &AM) { + return ConstantInt::get(IntPtrTy, AM.BaseOffs); + }); + if (!AddrModes[0].ScaledReg) + return false; + AddrModes[0].BaseOffs = 0; + break; + } + + case ExtAddrMode::ScaledRegField: + AddrModes[0].ScaledReg = + CreateSelectOrPHI([&](ExtAddrMode &AM) { return AM.ScaledReg; }); + if (!AddrModes[0].ScaledReg) + return false; + // If we have a mix of scaled and unscaled addrmodes then we want scale + // to be the scale and not zero. + if (!AddrModes[0].Scale) + for (ExtAddrMode &AM : AddrModes) + if (AM.Scale) { + AddrModes[0].Scale = AM.Scale; + break; + } + break; + } + + return true; + } + + // Convert a null Value to a null value constant + static Value *NullValueToNullConstant(Value *V, Type *T) { + if (V) + return V; + else + return Constant::getNullValue(T); + } + + Value *CreateSelectOrPHI(std::function GetVal) { + Value *Addr = AddrModes[0].AddrModeUser; + // The select/phi has to be used only in the memory instruction the addrmode + // will be folded into. + if (!Addr->hasOneUse()) + return nullptr; + // Start off by figuring out the type + Type *T = nullptr; + for (ExtAddrMode &AM : AddrModes) { + Value *V = GetVal(AM); + if (V) { + Type *NewT = V->getType(); + if (!T) + T = NewT; + else if (T != NewT) + return nullptr; // We can't handle mismatched types + } + } + Value *Result = nullptr; + if (PHINode *OrigPHI = dyn_cast(Addr)) { + // We can't do anything if addrmodes doesn't match the number of incoming + // values + if (AddrModes.size() != OrigPHI->getNumIncomingValues()) + return nullptr; + PHINode *NewPHI = PHINode::Create(T, OrigPHI->getNumIncomingValues(), + OrigPHI->getName(), + OrigPHI->getParent()->getFirstNonPHI()); + for (BasicBlock *BB : OrigPHI->blocks()) { + Value *IV = OrigPHI->getIncomingValueForBlock(BB); + // Find the AddrMode this corresponds to + for (ExtAddrMode &AM : AddrModes) { + if (AM.OriginalValue == IV) { + Value *V = NullValueToNullConstant(GetVal(AM), T); + NewPHI->addIncoming(V, BB); + break; + } + } + } + // If we failed to add an incoming value for each in the original then we + // can't proceed + if (NewPHI->getNumIncomingValues() != OrigPHI->getNumIncomingValues()) { + NewPHI->eraseFromParent(); + return nullptr; + } + Result = NewPHI; + } else if (SelectInst *OrigSelect = dyn_cast(Addr)) { + // Can't do anything if we don't have exactly two addrmodes + if (AddrModes.size() != 2) + return nullptr; + // The two addrmodes we have must correspond to the operands of the select + if (AddrModes[0].OriginalValue != OrigSelect->getTrueValue()) + return nullptr; + if (AddrModes[1].OriginalValue != OrigSelect->getFalseValue()) + return nullptr; + Value *X = NullValueToNullConstant(GetVal(AddrModes[0]), T); + Value *Y = NullValueToNullConstant(GetVal(AddrModes[1]), T); + Result = SelectInst::Create(OrigSelect->getCondition(), X, Y, + OrigSelect->getName(), OrigSelect, OrigSelect); + } else { + llvm_unreachable("Addr should be PHI or Select"); + } + return Result; } }; @@ -4504,21 +4640,22 @@ // Try to collapse single-value PHI nodes. This is necessary to undo // unprofitable PRE transformations. - SmallVector worklist; + SmallVector, 8> worklist; SmallPtrSet Visited; - worklist.push_back(Addr); + worklist.push_back({Addr,MemoryInst}); // Use a worklist to iteratively look through PHI and select nodes, and // ensure that the addressing mode obtained from the non-PHI/select roots of // the graph are compatible. bool PhiOrSelectSeen = false; SmallVector AddrModeInsts; - AddressingModeCombiner AddrModes; + AddressingModeCombiner AddrModes(*DL); TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); while (!worklist.empty()) { - Value *V = worklist.back(); + Value *V = worklist.back().first; + User *U = worklist.back().second; worklist.pop_back(); // We allow traversing cyclic Phi nodes. @@ -4536,14 +4673,14 @@ // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast(V)) { for (Value *IncValue : P->incoming_values()) - worklist.push_back(IncValue); + worklist.push_back({IncValue,P}); PhiOrSelectSeen = true; continue; } // Similar for select. if (SelectInst *SI = dyn_cast(V)) { - worklist.push_back(SI->getFalseValue()); - worklist.push_back(SI->getTrueValue()); + worklist.push_back({SI->getFalseValue(),SI}); + worklist.push_back({SI->getTrueValue(),SI}); PhiOrSelectSeen = true; continue; } @@ -4556,6 +4693,7 @@ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, InsertedInsts, PromotedInsts, TPT); NewAddrMode.OriginalValue = V; + NewAddrMode.AddrModeUser = U; if (!AddrModes.addNewAddrMode(NewAddrMode)) break; Index: test/CodeGen/X86/tail-merge-identical.ll =================================================================== --- test/CodeGen/X86/tail-merge-identical.ll +++ test/CodeGen/X86/tail-merge-identical.ll @@ -8,8 +8,8 @@ ; %else1 and %then2 end up lowering to identical blocks. These blocks should be ; merged during tail-merging. ; CHECK-LABEL: merge_identical_blocks -; CHECK: movl $data+4 -; CHECK-NOT: movl $data+4 +; CHECK: movl $4 +; CHECK-NOT: movl $4 ; CHECK: retq define void @merge_identical_blocks(i1 %a, i1 %b) { entry: Index: test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- test/CodeGen/X86/x86-cmov-converter.ll +++ test/CodeGen/X86/x86-cmov-converter.ll @@ -234,7 +234,9 @@ } ; CHECK-LABEL: BinarySearch -; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: setae %[[REG:[a-z]]]l +; CHECK-NEXT: movq 8(%rdx,%r[[REG]]x,8) define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 { entry: Index: test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll =================================================================== --- /dev/null +++ test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll @@ -0,0 +1,367 @@ +; RUN: opt -S -codegenprepare -mtriple=thumbv7m < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +@gv1 = common global i32 0, align 4 +@gv2 = common global i32 0, align 4 + +; Phi selects between ptr and gep with ptr as base and constant offset +define void @test_phi_onegep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_phi_onegep_offset +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ] +; CHECK: phi i32 [ 0, %entry ], [ 4, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with same base, different constant offsets +define void @test_phi_twogep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_phi_twogep_offset +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32 [ 4, %if.then ], [ 8, %if.else ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between ptr and gep with ptr as base and nonconstant offset +define void @test_phi_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) { +; CHECK-LABEL: @test_phi_onegep_nonconst_offset +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if.then ] +; CHECK: phi i32 [ 0, %entry ], [ %off, %if.then ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 %off + br label %if.end + +if.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if.then ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with same base, different nonconstant offsets +define void @test_phi_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) { +; CHECK-LABEL: @test_phi_twogep_nonconst_offset +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32 [ %off1, %if.then ], [ %off2, %if.else ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with different base, same constant offset +define void @test_phi_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_phi_twogep_base +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32* [ %ptr1, %if.then ], [ %ptr2, %if.else ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between two geps with different base global variables, same constant offset +define void @test_phi_twogep_base_gv(i32 %value) { +; CHECK-LABEL: @test_phi_twogep_base_gv +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] +; CHECK: phi i32* [ @gv1, %if.then ], [ @gv2, %if.else ] +entry: + %cmp = icmp sgt i32 %value, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1 + br label %if.end + +if.else: + %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else ] + store i32 %value, i32* %phi, align 4 + ret void +} + +; Phi selects between ptr and gep with ptr as base and constant offset +define void @test_select_onegep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_select_onegep_offset +; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep +; CHECK: select i1 %cmp, i32 0, i32 4 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + %select = select i1 %cmp, i32* %ptr, i32* %gep + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with same base, different constant offsets +define void @test_select_twogep_offset(i32* %ptr, i32 %value) { +; CHECK-LABEL: @test_select_twogep_offset +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32 4, i32 8 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between ptr and gep with ptr as base and nonconstant offset +define void @test_select_onegep_nonconst_offset(i32* %ptr, i32 %value, i32 %off) { +; CHECK-LABEL: @test_select_onegep_nonconst_offset +; CHECK-NOT: select i1 %cmp, i32* %ptr, i32* %gep +; CHECK: select i1 %cmp, i32 0, i32 %off +entry: + %cmp = icmp sgt i32 %value, 0 + %gep = getelementptr inbounds i32, i32* %ptr, i32 %off + %select = select i1 %cmp, i32* %ptr, i32* %gep + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with same base, different nonconstant offsets +define void @test_select_twogep_nonconst_offset(i32* %ptr, i32 %value, i32 %off1, i32 %off2) { +; CHECK-LABEL: @test_select_twogep_nonconst_offset +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32 %off1, i32 %off2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 %off2 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with different base, same constant offset +define void @test_select_twogep_base(i32* %ptr1, i32* %ptr2, i32 %value) { +; CHECK-LABEL: @test_select_twogep_base +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* %ptr1, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; Select between two geps with different base global variables, same constant offset +define void @test_select_twogep_base_gv(i32 %value) { +; CHECK-LABEL: @test_select_twogep_base_gv +; CHECK-NOT: select i1 %cmp, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2 +entry: + %cmp = icmp sgt i32 %value, 0 + %gep1 = getelementptr inbounds i32, i32* @gv1, i32 1 + %gep2 = getelementptr inbounds i32, i32* @gv2, i32 1 + %select = select i1 %cmp, i32* %gep1, i32* %gep2 + store i32 %value, i32* %select, align 4 + ret void +} + +; If the phi is in a different block to where the gep will be, the phi goes where +; the original phi was bit where the gep is. +; CHECK-LABEL: @test_phi_different_block +; CHECK-LABEL: if1.end +; CHECK-NOT: phi i32* [ %ptr, %entry ], [ %gep, %if1.then ] +; CHECK: phi i32 [ 0, %entry ], [ 4, %if1.then ] +define void @test_phi_different_block(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if1.then, label %if1.end + +if1.then: + %gep = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if1.end + +if1.end: + %phi = phi i32* [ %ptr, %entry ], [ %gep, %if1.then ] + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if2.then, label %if2.end + +if2.then: + store i32 %value1, i32* %ptr, align 4 + br label %if2.end + +if2.end: + store i32 %value2, i32* %phi, align 4 + ret void +} + +; A phi with three incoming values should be optimised +; CHECK-LABEL: @test_phi_threegep +; CHECK-NOT: phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] +; CHECK: phi i32 [ 4, %if.then ], [ 8, %if.else.then ], [ 12, %if.else.else ] +define void @test_phi_threegep(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.end + +if.else.else: + %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] + store i32 %value1, i32* %phi, align 4 + ret void +} + +; A phi with two incoming values but three geps due to nesting shouldn't be +; optimised +; CHECK-LABEL: @test_phi_threegep_nested +; CHECK: phi i32* [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] +define void @test_phi_threegep_nested(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %cmp1 = icmp sgt i32 %value1, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + br label %if.end + +if.else: + %cmp2 = icmp sgt i32 %value2, 0 + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + br label %if.else.end + +if.else.else: + %gep3 = getelementptr inbounds i32, i32* %ptr, i32 3 + br label %if.else.end + +if.else.end: + %gep4 = phi i32* [ %gep2, %if.else.then ], [ %gep3, %if.else.else ] + store i32 %value2, i32* %ptr, align 4 + br label %if.end + +if.end: + %phi = phi i32* [ %gep1, %if.then ], [ %gep4, %if.else.end ] + store i32 %value1, i32* %phi, align 4 + ret void +} + +; A nested select is expected to not be optimised +; CHECK-LABEL: @test_nested_select +; CHECK: select i1 %cmp2, i32* %gep1, i32* %gep2 +; CHECK: select i1 %cmp1, i32* %gep1, i32* %select1 +define void @test_nested_select(i32* %ptr, i32 %value1, i32 %value2) { +entry: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 2 + %cmp1 = icmp sgt i32 %value1, 0 + %cmp2 = icmp sgt i32 %value2, 0 + %select1 = select i1 %cmp2, i32* %gep1, i32* %gep2 + %select2 = select i1 %cmp1, i32* %gep1, i32* %select1 + store i32 %value1, i32* %select2, align 4 + ret void +} + +; Scaling the offset by a different amount is expected not to be optimised +; CHECK-LABEL: @test_select_different_scale +; CHECK: select i1 %cmp, i32* %gep1, i32* %castgep +define void @test_select_different_scale(i32* %ptr, i32 %value, i32 %off) { +entry: + %cmp = icmp sgt i32 %value, 0 + %castptr = bitcast i32* %ptr to i16* + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 %off + %gep2 = getelementptr inbounds i16, i16* %castptr, i32 %off + %castgep = bitcast i16* %gep2 to i32* + %select = select i1 %cmp, i32* %gep1, i32* %castgep + store i32 %value, i32* %select, align 4 + ret void +} + +; A select between two values is already the best we can do +; CHECK-LABEL: @test_select_trivial +; CHECK: select i1 %cmp, i32* %ptr1, i32* %ptr2 +define void @test_select_trivial(i32* %ptr1, i32* %ptr2, i32 %value) { +entey: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* %ptr1, i32* %ptr2 + store i32 %value, i32* %select, align 4 + ret void +} + +; A select between two global variables is already the best we can do +; CHECK-LABEL: @test_select_trivial_gv +; CHECK: select i1 %cmp, i32* @gv1, i32* @gv2 +define void @test_select_trivial_gv(i32 %value) { +entey: + %cmp = icmp sgt i32 %value, 0 + %select = select i1 %cmp, i32* @gv1, i32* @gv2 + store i32 %value, i32* %select, align 4 + ret void +}