Index: lib/Target/AArch64/AArch64AddressTypePromotion.cpp =================================================================== --- lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -32,8 +32,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -80,6 +82,7 @@ /// Filter out all sexts that does not have this type. /// Currently initialized with Int64Ty. Type *ConsideredSExtType; + const DataLayout *DL; // This transformation requires dominator info. void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -128,6 +131,14 @@ /// Merge redundant sign extension operations in common dominator. void mergeSExts(ValueToInsts &ValToSExtendedUses, SetOfInstructions &ToRemove); + + /// Check if the sign extension is foldable as a part of address calculation + /// in all GEPs which take the sign extension as an index. + bool isSExtFoldableInAllUsers(Instruction *SExtInst, + Instructions &GEPsInOtherBlock); + + /// Sink a foldable sign extension to user blocks. + void sinkFoldableSExt(Instruction *SExtInst, Instructions &GEPsInOtherBlock); }; } // end anonymous namespace. @@ -363,6 +374,48 @@ return LocalChange; } +bool AArch64AddressTypePromotion::isSExtFoldableInAllUsers( + Instruction *SExtInst, Instructions &GEPsInOtherBlock) { + BasicBlock *SExtBB = SExtInst->getParent(); + for (User *U : SExtInst->users()) { + // FIXME: To be simple, for now, we handle sign extensions used only by GEPs + // directly. We could also sink the promoted instructions forming a chain + // between SExt and GEP if foldable all together. + Instruction *GEPInst = dyn_cast(U); + if (!GEPInst) + return false; + + // FIXME: For foldability check in GEP, we simply see if all operands except + // SExtInst are constants (i.g., %base + TypeSize * sext + constant), and + // check if the type size indexed by the sign extension is the foldable + // amount as sign extension in add/sub instruction. + gep_type_iterator GTI = gep_type_begin(GEPInst); + for (unsigned i = 1, e = GEPInst->getNumOperands(); i != e; ++i, ++GTI) { + if (GEPInst->getOperand(i) == SExtInst) { + uint64_t TypeSize = DL->getTypeAllocSize(GTI.getIndexedType()); + if (TypeSize > 16 || !isPowerOf2_64(TypeSize)) + return false; + } else if (!isa(GEPInst->getOperand(i))) + return false; + } + if (GEPInst->getParent() != SExtBB) + GEPsInOtherBlock.push_back(GEPInst); + } + return true; +} + +void AArch64AddressTypePromotion::sinkFoldableSExt( + Instruction *SExtInst, Instructions &GEPsInOtherBlock) { + for (auto *GEPInst : GEPsInOtherBlock) { + assert(isa(GEPInst) && "Expect only GEP as a user."); + assert(GEPInst->getParent() != SExtInst->getParent() && + "Expect it to be in different block."); + Instruction *SExtInstSunk = SExtInst->clone(); + SExtInstSunk->insertBefore(GEPInst); + GEPInst->replaceUsesOfWith(SExtInst, SExtInstSunk); + } +} + void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, SetOfInstructions &ToRemove) { DominatorTree &DT = getAnalysis().getDomTree(); @@ -373,6 +426,16 @@ for (Instruction *Inst : Insts) { if (ToRemove.count(Inst)) continue; + + // Sink sign extension operations if foldable into address calculation. + Instructions GEPsInOtherBlock; + if (isSExtFoldableInAllUsers(Inst, GEPsInOtherBlock)) { + if (!GEPsInOtherBlock.empty()) + sinkFoldableSExt(Inst, GEPsInOtherBlock); + // No need to be merged if foldable as a part of address calculation. + continue; + } + bool inserted = false; for (auto &Pt : CurPts) { if (DT.dominates(Inst, Pt)) { @@ -475,6 +538,7 @@ return false; Func = &F; ConsideredSExtType = Type::getInt64Ty(Func->getContext()); + DL = &F.getParent()->getDataLayout(); DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n'); Index: test/CodeGen/AArch64/aarch64-address-type-promotion-sink.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-address-type-promotion-sink.ll @@ -0,0 +1,257 @@ +; RUN: llc < %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.16B = type { i16, i16 , i16, i16, i16, i16, i16, i16} +define i32 @func_16B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.16B* %P) { +; CHECK-LABEL: @func_16B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.16B, %struct.16B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.16B, %struct.16B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.8B = type { i16, i16 , i16, i16} +define i32 @func_8B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.8B* %P) { +; CHECK-LABEL: @func_8B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.8B, %struct.8B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.8B, %struct.8B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.4B = type { i16, i16 } +define i32 @func_4B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.4B* %P) { +; CHECK-LABEL: @func_4B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.4B, %struct.4B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.4B, %struct.4B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.2B = type { i8, i8 } +define i32 @func_2B(i8 %c, i8 %c2, i8* %base, i32 %i, i8 %v16, %struct.2B* %P) { +; CHECK-LABEL: @func_2B + +entry: +; CHECK-LABEL: %entry +; CHECK: ldrb w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + %s_ext = sext i32 %i to i64 + %addr0 = getelementptr inbounds %struct.2B, %struct.2B* %P, i64 %s_ext, i32 0 + %cc = load i8, i8* %addr0 + %cmp = icmp eq i8 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: + +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + + %addr1 = getelementptr inbounds i8, i8* %base, i64 %s_ext + %v = load i8, i8* %addr1 + %cmp2 = icmp eq i8 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: + +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strb w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strb w{{[0-9]+}}, [x[[ADDR0]], #1] + %addr2 = getelementptr inbounds %struct.2B, %struct.2B* %P, i64 %s_ext, i32 1 + store i8 %v16, i8* %addr0 + store i8 %v16, i8* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.1B = type { i8 } +define i32 @func_1B(i8 %c, i8 %c2, i8* %base, i32 %i, i8 %v16, %struct.1B* %P) { +; CHECK-LABEL: @func_1B + +entry: +; CHECK-LABEL: %entry +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + + %s_ext = sext i32 %i to i64 + %addr0 = getelementptr inbounds %struct.1B, %struct.1B* %P, i64 %s_ext, i32 0 + %cc = load i8, i8* %addr0 + %cmp = icmp eq i8 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: + +; CHECK-LABEL: %if.then +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + + %addr1 = getelementptr inbounds i8, i8* %base, i64 %s_ext + %v = load i8, i8* %addr1 + %cmp2 = icmp eq i8 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: + +; CHECK-LABEL: %if.then2 +; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + %addr2 = getelementptr inbounds %struct.1B, %struct.1B* %P, i64 %s_ext, i32 0 + store i8 %v16, i8* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.6B = type { i16, i16 , i16} +define i32 @func_6B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.6B* %P) { +; CHECK-LABEL: @func_6B + +entry: + %s_ext = sext i32 %i to i64 + %addr0 = getelementptr inbounds %struct.6B, %struct.6B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK: sxtw x{{[0-9]+}}, w{{[0-9]+}} + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK:madd + %addr2 = getelementptr inbounds %struct.6B, %struct.6B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.nomergeup = type { i16, i16 } +define i16 @func_no_mergeup(i32 %i, i16 %v16, %struct.nomergeup* %P) { +; CHECK-LABEL: @func_no_mergeup +entry: +; CHECK-LABEL: %entry +; CHECK: add x[[ADDR:[0-9]+]], x{{[0-9]+}}, w{{[0-9]+}}, sxtw #2 + + %add1 = add nsw i32 %i, 1 + %s_ext1 = sext i32 %add1 to i64 + %addr2 = getelementptr inbounds %struct.nomergeup, %struct.nomergeup* %P, i64 %s_ext1, i32 0 + + %lv = load i16, i16* %addr2 + %cmp = icmp eq i16 %lv, %v16 + br i1 %cmp, label %if.then, label %if.then2 + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR]], #2] + + %s_ext2 = sext i32 %i to i64 + %addr = getelementptr inbounds %struct.nomergeup, %struct.nomergeup* %P, i64 %s_ext2, i32 1 + %v = load i16, i16* %addr + ret i16 %v + +if.then2: + ret i16 0 +}