Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -395,7 +395,9 @@ // // SeparateConstOffsetFromGEP - Split GEPs for better CSE // -FunctionPass *createSeparateConstOffsetFromGEPPass(); +FunctionPass * +createSeparateConstOffsetFromGEPPass(const TargetMachine *TM = nullptr, + bool LowerGEP = false); //===----------------------------------------------------------------------===// // Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -80,6 +80,11 @@ cl::desc("Work around Cortex-A53 erratum 835769"), cl::init(false)); +static cl::opt +EnableGEPOpt("aarch64-gep-opt", cl::Hidden, + cl::desc("Enable optimizations on complex GEPs"), + cl::init(true)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(TheAArch64leTarget); @@ -193,6 +198,19 @@ addPass(createCFGSimplificationPass()); TargetPassConfig::addIRPasses(); + + if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { + // Call SeparateConstOffsetFromGEP pass to extract constants within indices + // and lower a GEP with multiple indices to either arithmetic operations or + // multiple GEPs with single index. + addPass(createSeparateConstOffsetFromGEPPass(TM, true)); + // Call EarlyCSE pass to find and remove subexpressions in the lowered + // result. + addPass(createEarlyCSEPass()); + // Do loop invariant code motion in case part of the lowered result is + // invariant. + addPass(createLICMPass()); + } } // Pass Pipeline Configuration Index: lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp =================================================================== --- lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -79,6 +79,81 @@ // ld.global.f32 %f3, [%rl6+128]; // much better // ld.global.f32 %f4, [%rl6+132]; // much better // +// Another improvement enabled by the LowerGEP flag is to lower a GEP with +// multiple indices to either multiple GEPs with a single index or arithmetic +// operations (depending on whether the target uses alias analysis in codegen). +// Such transformation can have following benefits: +// (1) It can always extract constants in the indices of structure type. +// (2) After such Lowering, there are more optimization opportunities such as +// CSE, LICM and CGP. +// +// E.g. The following GEPs have multiple indices: +// BB1: +// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 +// load %p +// ... +// BB2: +// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 +// load %p2 +// ... +// +// We can not do CSE for to the common part related to index "i64 %i". Lowering +// GEPs can achieve such goals. +// If the target does not use alias analysis in codegen, this pass will +// lower a GEP with multiple indices into arithmetic operations: +// BB1: +// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity +// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %3 = add i64 %1, %2 ; CSE opportunity +// %4 = mul i64 %j1, length_of_struct +// %5 = add i64 %3, %4 +// %6 = add i64 %3, struct_field_3 ; Constant offset +// %p = inttoptr i64 %6 to i32* +// load %p +// ... +// BB2: +// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity +// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %9 = add i64 %7, %8 ; CSE opportunity +// %10 = mul i64 %j2, length_of_struct +// %11 = add i64 %9, %10 +// %12 = add i64 %11, struct_field_2 ; Constant offset +// %p = inttoptr i64 %12 to i32* +// load %p2 +// ... +// +// If the target uses alias analysis in codegen, this pass will lower a GEP +// with multiple indices into multiple GEPs with a single index: +// BB1: +// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity +// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity +// %4 = mul i64 %j1, length_of_struct +// %5 = getelementptr i8* %3, i64 %4 +// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset +// %p = bitcast i8* %6 to i32* +// load %p +// ... +// BB2: +// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity +// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity +// %10 = mul i64 %j2, length_of_struct +// %11 = getelementptr i8* %9, i64 %10 +// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset +// %p2 = bitcast i8* %12 to i32* +// load %p2 +// ... +// +// Lower GEPs can also benefit other passes such as LICM and CGP. +// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple +// indices if one of the index is variant. If lower such GEP into invariant +// parts and variant parts, LICM can hoist/sink those invariant parts. +// CGP (CodeGen Prepare) tries to sink address calculations that match the +// target's addressing modes. A GEP with multiple indices may not match and will +// not be sunk. If lower such GEP into smaller parts, CGP may sink some of them. +// So we end up with better addressing mode. +// //===----------------------------------------------------------------------===// #include "llvm/Analysis/TargetTransformInfo.h" @@ -92,6 +167,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" using namespace llvm; @@ -117,18 +195,17 @@ /// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). class ConstantOffsetExtractor { public: - /// Extracts a constant offset from the given GEP index. It outputs the - /// numeric value of the extracted constant offset (0 if failed), and a + /// Extracts a constant offset from the given GEP index. It returns the /// new index representing the remainder (equal to the original index minus - /// the constant offset). + /// the constant offset), or nullptr if we cannot extract a constant offset. /// \p Idx The given GEP index - /// \p NewIdx The new index to replace (output) /// \p DL The datalayout of the module /// \p GEP The given GEP - static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, - GetElementPtrInst *GEP); - /// Looks for a constant offset without extracting it. The meaning of the - /// arguments and the return value are the same as Extract. + static Value *Extract(Value *Idx, const DataLayout *DL, + GetElementPtrInst *GEP); + /// Looks for a constant offset from the given GEP index without extracting + /// it. It returns the numeric value of the extracted constant offset (0 if + /// failed). The meaning of the arguments are the same as Extract. static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP); private: @@ -228,7 +305,9 @@ class SeparateConstOffsetFromGEP : public FunctionPass { public: static char ID; - SeparateConstOffsetFromGEP() : FunctionPass(ID) { + SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr, + bool LowerGEP = false) + : FunctionPass(ID), TM(TM), LowerGEP(LowerGEP) { initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); } @@ -251,10 +330,22 @@ /// Tries to split the given GEP into a variadic base and a constant offset, /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); - /// Finds the constant offset within each index, and accumulates them. This - /// function only inspects the GEP without changing it. The output - /// NeedsExtraction indicates whether we can extract a non-zero constant - /// offset from any index. + /// Lower a GEP with multiple indices into multiple GEPs with single index. + /// \p GEP The given GEP. It has been extracted a constant + /// offset. + /// \p AccumulativeByteOffset The extracted constant offset. + void lowerToSingleIndexGEPs(GetElementPtrInst *GEP, + int64_t AccumulativeByteOffset); + /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. + /// \p GEP The given GEP. It has been extracted a constant + /// offset. + /// \p AccumulativeByteOffset The extracted constant offset. + void lowerToArithmetics(GetElementPtrInst *GEP, + int64_t AccumulativeByteOffset); + /// Finds the constant offset within each index and accumulates them. If + /// LowerGEP is true, it finds in indices of both sequential and structure + /// types, otherwise it only finds in sequential indices. The output + /// NeedsExtraction indicates whether we can find a non-zero constant offset. int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); /// Canonicalize array indices to pointer-size integers. This helps to /// simplify the logic of splitting a GEP. For example, if a + b is a @@ -274,6 +365,10 @@ bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); const DataLayout *DL; + const TargetMachine *TM; + /// Whether to lower a GEP with multiple indices into arithmetic operations or + /// multiple GEPs with single index. + bool LowerGEP; }; } // anonymous namespace @@ -289,8 +384,10 @@ "Split GEPs to a variadic base and a constant offset for better CSE", false, false) -FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { - return new SeparateConstOffsetFromGEP(); +FunctionPass * +llvm::createSeparateConstOffsetFromGEPPass(const TargetMachine *TM, + bool LowerGEP) { + return new SeparateConstOffsetFromGEP(TM, LowerGEP); } bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, @@ -542,19 +639,17 @@ return BO; } -int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, - const DataLayout *DL, - GetElementPtrInst *GEP) { +Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL, + GetElementPtrInst *GEP) { ConstantOffsetExtractor Extractor(DL, GEP); // Find a non-zero constant offset first. APInt ConstantOffset = Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, GEP->isInBounds()); - if (ConstantOffset != 0) { - // Separates the constant offset from the GEP index. - NewIdx = Extractor.rebuildWithoutConstOffset(); - } - return ConstantOffset.getSExtValue(); + if (ConstantOffset == 0) + return nullptr; + // Separates the constant offset from the GEP index. + return Extractor.rebuildWithoutConstOffset(); } int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL, @@ -620,11 +715,116 @@ AccumulativeByteOffset += ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); } + } else if (LowerGEP) { + StructType *StTy = cast(*GTI); + uint64_t Field = cast(GEP->getOperand(I))->getZExtValue(); + // Skip field 0 as the offset is always 0. + if (Field != 0) { + NeedsExtraction = true; + AccumulativeByteOffset += + DL->getStructLayout(StTy)->getElementOffset(Field); + } } } return AccumulativeByteOffset; } +void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( + GetElementPtrInst *GEP, int64_t AccumulativeByteOffset) { + IRBuilder<> Builder(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + + Type *I8PtrTy = + Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace()); + Value *ResultPtr = GEP->getOperand(0); + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + + gep_type_iterator GTI = gep_type_begin(*GEP); + // Create an ugly GEP for each sequential index. We don't create GEPs for + // structure indices, as they are accumulated in the constant offset index. + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa(*GTI)) { + Value *Idx = GEP->getOperand(I); + // Skip zero indices. + if (ConstantInt *CI = dyn_cast(Idx)) + if (CI->isZero()) + continue; + + APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), + DL->getTypeAllocSize(GTI.getIndexedType())); + // Scale the index by element size. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + Idx = Builder.CreateShl( + Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); + } else { + Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); + } + } + // Create an ugly GEP with single index for each index. + ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep"); + } + } + + // Create a GEP with the constant offset index. + if (AccumulativeByteOffset != 0) { + Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); + ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep"); + } + if (ResultPtr->getType() != GEP->getType()) + ResultPtr = Builder.CreateBitCast(ResultPtr, GEP->getType()); + + GEP->replaceAllUsesWith(ResultPtr); + GEP->eraseFromParent(); +} + +void +SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *GEP, + int64_t AccumulativeByteOffset) { + IRBuilder<> Builder(GEP); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + + Value *ResultPtr = Builder.CreatePtrToInt(GEP->getOperand(0), IntPtrTy); + gep_type_iterator GTI = gep_type_begin(*GEP); + // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We + // don't create arithmetics for structure indices, as they are accumulated + // in the constant offset index. + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa(*GTI)) { + Value *Idx = GEP->getOperand(I); + // Skip zero indices. + if (ConstantInt *CI = dyn_cast(Idx)) + if (CI->isZero()) + continue; + + APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), + DL->getTypeAllocSize(GTI.getIndexedType())); + // Scale the index by element size. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + Idx = Builder.CreateShl( + Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); + } else { + Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); + } + } + // Create an ADD for each index. + ResultPtr = Builder.CreateAdd(ResultPtr, Idx); + } + } + + // Create an ADD for the constant offset index. + if (AccumulativeByteOffset != 0) { + ResultPtr = Builder.CreateAdd( + ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); + } + + ResultPtr = Builder.CreateIntToPtr(ResultPtr, GEP->getType()); + GEP->replaceAllUsesWith(ResultPtr); + GEP->eraseFromParent(); +} + bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Skip vector GEPs. if (GEP->getType()->isVectorTy()) @@ -642,32 +842,42 @@ if (!NeedsExtraction) return Changed; - // Before really splitting the GEP, check whether the backend supports the - // addressing mode we are about to produce. If no, this splitting probably - // won't be beneficial. - TargetTransformInfo &TTI = getAnalysis(); - if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), - /*BaseGV=*/nullptr, AccumulativeByteOffset, - /*HasBaseReg=*/true, /*Scale=*/0)) { - return Changed; + // If LowerGEP is disabled, before really splitting the GEP, check whether the + // backend supports the addressing mode we are about to produce. If no, this + // splitting probably won't be beneficial. + // If LowerGEP is enabled, even the extracted constant offset can not match + // the addressing mode, we can still do optimizations to other lowered parts + // of variable indices. So we don't such check. + if (!LowerGEP) { + TargetTransformInfo &TTI = getAnalysis(); + if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), + /*BaseGV=*/nullptr, AccumulativeByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0)) { + return Changed; + } } - // Remove the constant offset in each GEP index. The resultant GEP computes - // the variadic base. + // Remove the constant offset in each sequential index. The resultant GEP + // computes the variadic base. + // Notice that we don't remove struct field indices here. If LowerGEP is + // disabled, a structure index is not accumulated and we still use the old + // one. If LowerGEP is enabled, a structure index is accumulated in the + // constant offset. The following lowerToSingleIndexGEPs and + // lowerToArithmetics will handle the constant offset and won't need a new + // structure index. gep_type_iterator GTI = gep_type_begin(*GEP); for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (isa(*GTI)) { - Value *NewIdx = nullptr; - // Tries to extract a constant offset from this GEP index. - int64_t ConstantOffset = - ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); - if (ConstantOffset != 0) { - assert(NewIdx != nullptr && - "ConstantOffset != 0 implies NewIdx is set"); + // Splits this GEP index into a variadic part and a constant offset, and + // uses the variadic part as the new index. + Value *NewIdx = + ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP); + if (NewIdx != nullptr) { GEP->setOperand(I, NewIdx); } } } + // Clear the inbounds attribute because the new index may be off-bound. // e.g., // @@ -689,6 +899,21 @@ // possible. GEPs with inbounds are more friendly to alias analysis. GEP->setIsInBounds(false); + // Lowers a GEP to either GEPs with single index or arithmetic operations. + if (LowerGEP) { + // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to + // arithmetic operations if the target uses alias analysis in codegen. + if (TM && TM->getSubtarget().useAA()) + lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); + else + lowerToArithmetics(GEP, AccumulativeByteOffset); + return true; + } + + // No need to create another GEP if the accumulative byte offset is 0. + if (AccumulativeByteOffset == 0) + return true; + // Offsets the base with the accumulative byte offset. // // %gep ; the base Index: test/CodeGen/AArch64/aarch64-gep-opt.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -0,0 +1,162 @@ +; RUN: llc -O3 -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -O3 -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s +; RUN: llc -O3 -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linux-gnueabi" + +; Following test cases test enabling SeparateConstOffsetFromGEP pass in AArch64 +; backend. If useAA() returns true, it will transform a complex GEP into simpler +; GEPs, else it will transform a complex GEP into a ptrtoint/inttoptr form. + +%struct = type { i32, i32, i32, i32, [20 x i32] } + +; Check that when two complex GEPs are used in two basic blocks, LLVM can +; elimilate the common subexpression for the second use. +define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) { + %liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3 + %1 = load i32* %liberties, align 4 + %cmp = icmp eq i32 %1, %lib + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2 + %2 = load i32* %origin, align 4 + store i32 %2, i32* %adj, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; CHECK-LABEL: test_GEP_CSE: +; CHECK: madd +; CHECK: ldr +; CHECK-NOT: madd +; CHECK:ldr + +; CHECK-NoAA-LABEL: @test_GEP_CSE( +; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64 +; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 +; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] +; CHECK-NoAA: add i64 [[PTR2]], 23052 +; CHECK-NoAA: inttoptr +; CHECK-NoAA: if.then: +; CHECK-NoAA-NOT: ptrtoint +; CHECK-NoAA-NOT: mul +; CHECK-NoAA: add i64 [[PTR2]], 23048 +; CHECK-NoAA: inttoptr + +; CHECK-UseAA-LABEL: @test_GEP_CSE( +; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8* +; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 +; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]] +; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052 +; CHECK-UseAA: bitcast +; CHECK-UseAA: if.then: +; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048 +; CHECK-UseAA: bitcast + +%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]} +%struct.pt = type { %struct.point*, i32, i32 } +%struct.point = type { i32, i32 } + +; Check when a GEP is used across two basic block, LLVM can sink the address +; calculation and code gen can generate a better addressing mode for the second +; use. +define void @test_GEP_across_BB(%class.my* %this, i64 %idx) { + %1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1 + %2 = load i32* %1, align 4 + %3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2 + %4 = load i32* %3, align 4 + %5 = icmp eq i32 %2, %4 + br i1 %5, label %if.true, label %exit + +if.true: + %6 = shl i32 %4, 1 + store i32 %6, i32* %3, align 4 + br label %exit + +exit: + %7 = add nsw i32 %4, 1 + store i32 %7, i32* %1, align 4 + ret void +} +; CHECK-LABEL: test_GEP_across_BB: +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528] +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532] +; CHECK-NOT: add +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532] +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528] + +; CHECK-NoAA-LABEL: test_GEP_across_BB( +; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528 +; CHECK-NoAA: add i64 [[TMP]], 532 +; CHECK-NoAA: if.true: +; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532 +; CHECK-NoAA: exit: +; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528 + +; CHECK-UseAA-LABEL: test_GEP_across_BB( +; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr +; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528 +; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532 +; CHECK-UseAA: if.true: +; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532 +; CHECK-UseAA: exit: +; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528 + +%struct.S = type { float, double } +@struct_array = global [1024 x %struct.S] zeroinitializer, align 16 + +; The following two test cases check we can extract constant from indices of +; struct type. +; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the +; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field +; offset is 8, the total constant offset is (5 * 16 + 8) = 88. +define double* @test-struct_1(i32 %i) { +entry: + %add = add nsw i32 %i, 5 + %idxprom = sext i32 %add to i64 + %p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1 + ret double* %p +} +; CHECK-NoAA-LABEL: @test-struct_1( +; CHECK-NoAA-NOT: getelementptr +; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88 + +; CHECK-UseAA-LABEL: @test-struct_1( +; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88 + +%struct3 = type { i64, i32 } +%struct2 = type { %struct3, i32 } +%struct1 = type { i64, %struct2 } +%struct0 = type { i32, i32, i64*, [100 x %struct1] } + +; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1". +; "i32 3" is the 4th element whose field offset is 16. The alloca size of +; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the +; total constant offset is 16 + (-2 * 32) + 8 = -40 +define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) { +entry: + %arrayidx = add nsw i64 %idx, -2 + %ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 + ret %struct2* %ptr2 +} +; CHECK-NoAA-LABEL: @test-struct_2( +; CHECK-NoAA-NOT: = getelementptr +; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40 + +; CHECK-UseAA-LABEL: @test-struct_2( +; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40 + +; Test that when a index is added from two constant, SeparateConstOffsetFromGEP +; pass does not generate incorrect result. +define void @test_const_add([3 x i32]* %in) { + %inc = add nsw i32 2, 1 + %idxprom = sext i32 %inc to i64 + %arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2 + store i32 0, i32* %arrayidx, align 4 + ret void +} +; CHECK-LABEL: test_const_add: +; CHECK: str wzr, [x0, #44] Index: test/CodeGen/AArch64/arm64-addr-mode-folding.ll =================================================================== --- test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s +; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-gep-opt=false %s -o - | FileCheck %s ; @block = common global i8* null, align 8 Index: test/CodeGen/AArch64/arm64-cse.ll =================================================================== --- test/CodeGen/AArch64/arm64-cse.ll +++ test/CodeGen/AArch64/arm64-cse.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false | FileCheck %s target triple = "arm64-apple-ios" ; rdar://12462006