Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -537,6 +537,9 @@ bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; + /// \return True if the extension instrution \p I is foldable in all users. + bool isExtFoldableInAllUsers(const Instruction &I) const; + /// \return The size of a cache line in bytes. unsigned getCacheLineSize() const; @@ -818,6 +821,7 @@ virtual unsigned getRegisterBitWidth(bool Vector) = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; + virtual bool isExtFoldableInAllUsers(const Instruction &I) = 0; virtual unsigned getCacheLineSize() = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; @@ -1054,6 +1058,9 @@ return Impl.shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } + bool isExtFoldableInAllUsers(const Instruction &I) override { + return Impl.isExtFoldableInAllUsers(I); + } unsigned getCacheLineSize() override { return Impl.getCacheLineSize(); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -312,6 +312,10 @@ return false; } + bool isExtFoldableInAllUsers(const Instruction &I) { + return false; + } + unsigned getCacheLineSize() { return 0; } unsigned getPrefetchDistance() { return 0; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -279,6 +279,10 @@ I, AllowPromotionWithoutCommonHeader); } +bool TargetTransformInfo::isExtFoldableInAllUsers(const Instruction &I) const { + return TTIImpl->isExtFoldableInAllUsers(I); +} + unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1058,7 +1058,7 @@ } /// SinkCast - Sink the specified cast instruction into its user blocks -static bool SinkCast(CastInst *CI) { +static bool SinkCast(CastInst *&CI, SetOfInstrs &InsertedInsts) { BasicBlock *DefBB = CI->getParent(); /// InsertedCasts - Only insert a cast in each block once. @@ -1102,6 +1102,7 @@ assert(InsertPt != UserBB->end()); InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", &*InsertPt); + InsertedInsts.insert(InsertedCast); } // Replace a use of the cast with a use of the new cast. @@ -1113,6 +1114,7 @@ // If we removed all uses, nuke the cast. if (CI->use_empty()) { CI->eraseFromParent(); + CI = nullptr; MadeChange = true; } @@ -1126,7 +1128,7 @@ /// Return true if any changes are made. /// static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, - const DataLayout &DL) { + const DataLayout &DL, SetOfInstrs &InsertedInsts) { // Sink only "cheap" (or nop) address-space casts. This is a weaker condition // than sinking only nop casts, but is helpful on some platforms. if (auto *ASC = dyn_cast(CI)) { @@ -1161,7 +1163,7 @@ if (SrcVT != DstVT) return false; - return SinkCast(CI); + return SinkCast(CI, InsertedInsts); } /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if @@ -4756,11 +4758,25 @@ /// \p Inst[in/out] the extension may be modified during the process if some /// promotions apply. bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { + bool Changed = false; // ExtLoad formation and address type promotion infrastructure requires TLI to // be effective. if (!TLI) return false; + if (Inst->getOpcode() == Instruction::SExt) { + if (TTI->isExtFoldableInAllUsers(*Inst)) { + // Sink the current extension into user blocks if foldable with all users. + CastInst *SI = cast(Inst); + Changed = SinkCast(SI, InsertedInsts); + // Check if the instruction is erased. + if (!SI) { + Inst = nullptr; + return true; + } + } + } + bool AllowPromotionWithoutCommonHeader = false; /// See if it is an interesting sext operations for the address type /// promotion before trying to promote it, e.g., the ones with the right @@ -4808,7 +4824,7 @@ return true; TPT.rollback(LastKnownGood); - return false; + return Changed; } // Perform address type promotion if doing so is profitable. @@ -4882,6 +4898,9 @@ } bool CodeGenPrepare::optimizeExtUses(Instruction *I) { + if (!I) + return false; + BasicBlock *DefBB = I->getParent(); // If the result of a {s|z}ext and its source are both live out, rewrite all @@ -5978,7 +5997,7 @@ if (isa(CI->getOperand(0))) return false; - if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) + if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL, InsertedInsts)) return true; if (isa(I) || isa(I)) { @@ -5988,7 +6007,7 @@ TLI->getTypeAction(CI->getContext(), TLI->getValueType(*DL, CI->getType())) == TargetLowering::TypeExpandInteger) { - return SinkCast(CI); + return SinkCast(CI, InsertedInsts); } else { bool MadeChange = optimizeExt(I); return MadeChange | optimizeExtUses(I); Index: lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.h +++ lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -127,6 +127,8 @@ shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader); + bool isExtFoldableInAllUsers(const Instruction &I); + unsigned getCacheLineSize(); unsigned getPrefetchDistance(); Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -660,6 +660,14 @@ return Considerable; } +bool AArch64TTIImpl::isExtFoldableInAllUsers(const Instruction &I) { + // FIXME: we handle only SExt for now. + if (I.getOpcode() != Instruction::SExt) + return false; + // SExt will be free when it is foldable in all users. + return TLI->isExtFree(&I); +} + unsigned AArch64TTIImpl::getCacheLineSize() { return ST->getCacheLineSize(); } Index: test/CodeGen/AArch64/aarch64-address-type-promotion-sink.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-address-type-promotion-sink.ll @@ -0,0 +1,122 @@ +; RUN: llc < %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.16B = type { i16, i16 , i16, i16, i16, i16, i16, i16} +define i32 @func_16B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.16B* %P) { +; CHECK-LABEL: @func_16B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.16B, %struct.16B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.16B, %struct.16B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.8B = type { i16, i16 , i16, i16} +define i32 @func_8B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.8B* %P) { +; CHECK-LABEL: @func_8B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.8B, %struct.8B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.8B, %struct.8B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.4B = type { i16, i16 } +define i32 @func_4B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.4B* %P) { +; CHECK-LABEL: @func_4B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.4B, %struct.4B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.4B, %struct.4B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} +