Index: aarch64-address-type-promotion-sink.ll =================================================================== --- /dev/null +++ aarch64-address-type-promotion-sink.ll @@ -0,0 +1,257 @@ +; RUN: llc < %s -o - | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.16B = type { i16, i16 , i16, i16, i16, i16, i16, i16} +define i32 @func_16B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.16B* %P) { +; CHECK-LABEL: @func_16B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.16B, %struct.16B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.16B, %struct.16B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.8B = type { i16, i16 , i16, i16} +define i32 @func_8B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.8B* %P) { +; CHECK-LABEL: @func_8B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.8B, %struct.8B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.8B, %struct.8B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.4B = type { i16, i16 } +define i32 @func_4B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.4B* %P) { +; CHECK-LABEL: @func_4B + +entry: + %s_ext = sext i32 %i to i64 + +; CHECK-LABEL: %entry +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + + %addr0 = getelementptr inbounds %struct.4B, %struct.4B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw #1] + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strh w{{[0-9]+}}, [x[[ADDR0]], #2] + + %addr2 = getelementptr inbounds %struct.4B, %struct.4B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.2B = type { i8, i8 } +define i32 @func_2B(i8 %c, i8 %c2, i8* %base, i32 %i, i8 %v16, %struct.2B* %P) { +; CHECK-LABEL: @func_2B + +entry: +; CHECK-LABEL: %entry +; CHECK: ldrb w{{[0-9]+}}, [x[[ADDR0:[0-9]+]]] + %s_ext = sext i32 %i to i64 + %addr0 = getelementptr inbounds %struct.2B, %struct.2B* %P, i64 %s_ext, i32 0 + %cc = load i8, i8* %addr0 + %cmp = icmp eq i8 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: + +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + + %addr1 = getelementptr inbounds i8, i8* %base, i64 %s_ext + %v = load i8, i8* %addr1 + %cmp2 = icmp eq i8 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: + +; CHECK-LABEL: %if.then2 +; CHECK-NOT: add x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: strb w{{[0-9]+}}, [x[[ADDR0]]] +; CHECK: strb w{{[0-9]+}}, [x[[ADDR0]], #1] + %addr2 = getelementptr inbounds %struct.2B, %struct.2B* %P, i64 %s_ext, i32 1 + store i8 %v16, i8* %addr0 + store i8 %v16, i8* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.1B = type { i8 } +define i32 @func_1B(i8 %c, i8 %c2, i8* %base, i32 %i, i8 %v16, %struct.1B* %P) { +; CHECK-LABEL: @func_1B + +entry: +; CHECK-LABEL: %entry +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + + %s_ext = sext i32 %i to i64 + %addr0 = getelementptr inbounds %struct.1B, %struct.1B* %P, i64 %s_ext, i32 0 + %cc = load i8, i8* %addr0 + %cmp = icmp eq i8 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: + +; CHECK-LABEL: %if.then +; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + + %addr1 = getelementptr inbounds i8, i8* %base, i64 %s_ext + %v = load i8, i8* %addr1 + %cmp2 = icmp eq i8 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: + +; CHECK-LABEL: %if.then2 +; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, w{{[0-9]+}}, sxtw] + %addr2 = getelementptr inbounds %struct.1B, %struct.1B* %P, i64 %s_ext, i32 0 + store i8 %v16, i8* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.6B = type { i16, i16 , i16} +define i32 @func_6B(i16 %c, i16 %c2, i16* %base, i32 %i, i16 %v16, %struct.6B* %P) { +; CHECK-LABEL: @func_6B + +entry: + %s_ext = sext i32 %i to i64 + %addr0 = getelementptr inbounds %struct.6B, %struct.6B* %P, i64 %s_ext, i32 0 + %cc = load i16, i16* %addr0 + %cmp = icmp eq i16 %cc, %c + br i1 %cmp, label %if.then, label %out + +if.then: +; CHECK-LABEL: %if.then +; CHECK: sxtw x{{[0-9]+}}, w{{[0-9]+}} + %addr1 = getelementptr inbounds i16, i16* %base, i64 %s_ext + %v = load i16, i16* %addr1 + %cmp2 = icmp eq i16 %v, %c2 + br i1 %cmp2, label %if.then2, label %out + +if.then2: +; CHECK-LABEL: %if.then2 +; CHECK:madd + %addr2 = getelementptr inbounds %struct.6B, %struct.6B* %P, i64 %s_ext, i32 1 + store i16 %v16, i16* %addr0 + store i16 %v16, i16* %addr2 + ret i32 0 + +out: + ret i32 0 +} + +%struct.nomergeup = type { i16, i16 } +define i16 @func_no_mergeup(i32 %i, i16 %v16, %struct.nomergeup* %P) { +; CHECK-LABEL: @func_no_mergeup +entry: +; CHECK-LABEL: %entry +; CHECK: add x[[ADDR:[0-9]+]], x{{[0-9]+}}, w{{[0-9]+}}, sxtw #2 + + %add1 = add nsw i32 %i, 1 + %s_ext1 = sext i32 %add1 to i64 + %addr2 = getelementptr inbounds %struct.nomergeup, %struct.nomergeup* %P, i64 %s_ext1, i32 0 + + %lv = load i16, i16* %addr2 + %cmp = icmp eq i16 %lv, %v16 + br i1 %cmp, label %if.then, label %if.then2 + +if.then: +; CHECK-LABEL: %if.then +; CHECK-NOT: sxtw x{{[0-9]+}}, w{{[0-9]+}} +; CHECK: ldrh w{{[0-9]+}}, [x[[ADDR]], #2] + + %s_ext2 = sext i32 %i to i64 + %addr = getelementptr inbounds %struct.nomergeup, %struct.nomergeup* %P, i64 %s_ext2, i32 1 + %v = load i16, i16* %addr + ret i16 %v + +if.then2: + ret i16 0 +} Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -234,6 +234,10 @@ SmallVectorImpl &LastMovedExts, unsigned CreatedInstsCost = 0); bool mergeSExts(Function &F); + bool isSExtFoldableInAllUsers(Instruction *SExtInst, + SmallVectorImpl &GEPsInOtherBlock); + void sinkFoldableSExt(Instruction *SExtInst, + SmallVectorImpl &GEPsInOtherBlock); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); }; @@ -4363,6 +4367,49 @@ return Promoted; } +void CodeGenPrepare::sinkFoldableSExt( + Instruction *SExtInst, SmallVectorImpl &GEPsInOtherBlock) { + for (auto *GEPInst : GEPsInOtherBlock) { + assert(isa(GEPInst) && "Expect only GEP as a user."); + assert(GEPInst->getParent() != SExtInst->getParent() && + "Expect it to be in different block."); + Instruction *SExtInstSunk = SExtInst->clone(); + SExtInstSunk->insertBefore(GEPInst); + GEPInst->replaceUsesOfWith(SExtInst, SExtInstSunk); + InsertedInsts.insert(SExtInstSunk); + } +} + +bool CodeGenPrepare::isSExtFoldableInAllUsers( + Instruction *SExtInst, SmallVectorImpl &GEPsInOtherBlock) { + BasicBlock *SExtBB = SExtInst->getParent(); + for (User *U : SExtInst->users()) { + // FIXME: To be simple, for now, we handle sign extensions used only by GEPs + // directly. We could also sink the promoted instructions forming a chain + // between SExt and GEP if foldable all together. + Instruction *GEPInst = dyn_cast(U); + if (!GEPInst) + return false; + + // FIXME: For foldability check in GEP, we simply see if all operands except + // SExtInst are constants (i.g., %base + TypeSize * sext + constant), and + // check if the type size indexed by the sign extension is the foldable + // amount as sign extension in add/sub instruction. + gep_type_iterator GTI = gep_type_begin(GEPInst); + for (unsigned i = 1, e = GEPInst->getNumOperands(); i != e; ++i, ++GTI) { + if (GEPInst->getOperand(i) == SExtInst) { + uint64_t TypeSize = DL->getTypeAllocSize(GTI.getIndexedType()); + if (TypeSize > 16 || !isPowerOf2_64(TypeSize)) + return false; + } else if (!isa(GEPInst->getOperand(i))) + return false; + } + if (GEPInst->getParent() != SExtBB) + GEPsInOtherBlock.push_back(GEPInst); + } + return true; +} + bool CodeGenPrepare::mergeSExts(Function &F) { DominatorTree DT(F); bool Changed = false; @@ -4374,6 +4421,15 @@ Inst->getOperand(0) != Entry.first) continue; + // Sink sign extension operations if foldable into address calculation. + SmallVector GEPsInOtherBlock; + if (isSExtFoldableInAllUsers(Inst, GEPsInOtherBlock)) { + if (!GEPsInOtherBlock.empty()) + sinkFoldableSExt(Inst, GEPsInOtherBlock); + // No need to be merged if foldable as a part of address calculation. + continue; + } + bool inserted = false; for (auto &Pt : CurPts) { if (DT.dominates(Inst, Pt)) {