Skip to content

Commit c399830

Browse files
author
Hal Finkel
committedApr 12, 2014
Add the ability to use GEPs for address sinking in CGP
The current memory-instruction optimization logic in CGP, which sinks parts of the address computation that can be adsorbed by the addressing mode, does this by explicitly converting the relevant part of the address computation into IR-level integer operations (making use of ptrtoint and inttoptr). For most targets this is currently not a problem, but for targets wishing to make use of IR-level aliasing analysis during CodeGen, the use of ptrtoint/inttoptr is a problem for two reasons: 1. BasicAA becomes less powerful in the face of the ptrtoint/inttoptr 2. In cases where type-punning was used, and BasicAA was used to override TBAA, BasicAA may no longer do so. (this had forced us to disable all use of TBAA in CodeGen; something which we can now enable again) This (use of GEPs instead of ptrtoint/inttoptr) is not currently enabled by default (except for those targets that use AA during CodeGen), and so aside from some PowerPC subtargets and SystemZ, there should be no change in behavior. We may be able to switch completely away from the ptrtoint/inttoptr sinking on all targets, but further testing is required. I've doubled-up on a number of existing tests that are sensitive to the address sinking behavior (including some store-merging tests that are sensitive to the order of the resulting ADD operations at the SDAG level). llvm-svn: 206092
1 parent b196629 commit c399830

12 files changed

+164
-2
lines changed
 

‎llvm/lib/CodeGen/CodeGenPrepare.cpp

+126
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "llvm/Support/raw_ostream.h"
4040
#include "llvm/Target/TargetLibraryInfo.h"
4141
#include "llvm/Target/TargetLowering.h"
42+
#include "llvm/Target/TargetSubtargetInfo.h"
4243
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
4344
#include "llvm/Transforms/Utils/BuildLibCalls.h"
4445
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
@@ -70,6 +71,10 @@ static cl::opt<bool> DisableSelectToBranch(
7071
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
7172
cl::desc("Disable select to branch conversion."));
7273

74+
static cl::opt<bool> AddrSinkUsingGEPs(
75+
"addr-sink-using-gep", cl::Hidden, cl::init(false),
76+
cl::desc("Address sinking in CGP using GEPs."));
77+
7378
static cl::opt<bool> EnableAndCmpSinking(
7479
"enable-andcmp-sinking", cl::Hidden, cl::init(true),
7580
cl::desc("Enable sinkinig and/cmp into branches."));
@@ -2423,6 +2428,127 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
24232428
<< *MemoryInst);
24242429
if (SunkAddr->getType() != Addr->getType())
24252430
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
2431+
} else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
2432+
TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
2433+
// By default, we use the GEP-based method when AA is used later. This
2434+
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
2435+
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
2436+
<< *MemoryInst);
2437+
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
2438+
Value *ResultPtr = 0, *ResultIndex = 0;
2439+
2440+
// First, find the pointer.
2441+
if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
2442+
ResultPtr = AddrMode.BaseReg;
2443+
AddrMode.BaseReg = 0;
2444+
}
2445+
2446+
if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
2447+
// We can't add more than one pointer together, nor can we scale a
2448+
// pointer (both of which seem meaningless).
2449+
if (ResultPtr || AddrMode.Scale != 1)
2450+
return false;
2451+
2452+
ResultPtr = AddrMode.ScaledReg;
2453+
AddrMode.Scale = 0;
2454+
}
2455+
2456+
if (AddrMode.BaseGV) {
2457+
if (ResultPtr)
2458+
return false;
2459+
2460+
ResultPtr = AddrMode.BaseGV;
2461+
}
2462+
2463+
// If the real base value actually came from an inttoptr, then the matcher
2464+
// will look through it and provide only the integer value. In that case,
2465+
// use it here.
2466+
if (!ResultPtr && AddrMode.BaseReg) {
2467+
ResultPtr =
2468+
Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
2469+
AddrMode.BaseReg = 0;
2470+
} else if (!ResultPtr && AddrMode.Scale == 1) {
2471+
ResultPtr =
2472+
Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
2473+
AddrMode.Scale = 0;
2474+
}
2475+
2476+
if (!ResultPtr &&
2477+
!AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
2478+
SunkAddr = Constant::getNullValue(Addr->getType());
2479+
} else if (!ResultPtr) {
2480+
return false;
2481+
} else {
2482+
Type *I8PtrTy =
2483+
Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
2484+
2485+
// Start with the base register. Do this first so that subsequent address
2486+
// matching finds it last, which will prevent it from trying to match it
2487+
// as the scaled value in case it happens to be a mul. That would be
2488+
// problematic if we've sunk a different mul for the scale, because then
2489+
// we'd end up sinking both muls.
2490+
if (AddrMode.BaseReg) {
2491+
Value *V = AddrMode.BaseReg;
2492+
if (V->getType() != IntPtrTy)
2493+
V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
2494+
2495+
ResultIndex = V;
2496+
}
2497+
2498+
// Add the scale value.
2499+
if (AddrMode.Scale) {
2500+
Value *V = AddrMode.ScaledReg;
2501+
if (V->getType() == IntPtrTy) {
2502+
// done.
2503+
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
2504+
cast<IntegerType>(V->getType())->getBitWidth()) {
2505+
V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
2506+
} else {
2507+
// It is only safe to sign extend the BaseReg if we know that the math
2508+
// required to create it did not overflow before we extend it. Since
2509+
// the original IR value was tossed in favor of a constant back when
2510+
// the AddrMode was created we need to bail out gracefully if widths
2511+
// do not match instead of extending it.
2512+
Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
2513+
if (I && (ResultIndex != AddrMode.BaseReg))
2514+
I->eraseFromParent();
2515+
return false;
2516+
}
2517+
2518+
if (AddrMode.Scale != 1)
2519+
V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
2520+
"sunkaddr");
2521+
if (ResultIndex)
2522+
ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
2523+
else
2524+
ResultIndex = V;
2525+
}
2526+
2527+
// Add in the Base Offset if present.
2528+
if (AddrMode.BaseOffs) {
2529+
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
2530+
if (ResultIndex) {
2531+
// We need to add this separately from the scale above to help with
2532+
// SDAG consecutive load/store merging.
2533+
if (ResultPtr->getType() != I8PtrTy)
2534+
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
2535+
ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
2536+
}
2537+
2538+
ResultIndex = V;
2539+
}
2540+
2541+
if (!ResultIndex) {
2542+
SunkAddr = ResultPtr;
2543+
} else {
2544+
if (ResultPtr->getType() != I8PtrTy)
2545+
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
2546+
SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
2547+
}
2548+
2549+
if (SunkAddr->getType() != Addr->getType())
2550+
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
2551+
}
24262552
} else {
24272553
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
24282554
<< *MemoryInst);

‎llvm/test/CodeGen/ARM/phi.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s
23

34
; <rdar://problem/8686347>
45

‎llvm/test/CodeGen/ARM64/dagcombiner-indexed-load.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -O3 < %s | FileCheck %s
2+
; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
23
; Test case for a DAG combiner bug where we combined an indexed load
34
; with an extension (sext, zext, or any) into a regular extended load,
45
; i.e., dropping the indexed value.

‎llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -mcpu=g5 < %s | FileCheck %s
2+
; RUN: llc -mcpu=g5 -addr-sink-using-gep=1 < %s | FileCheck %s
23
;; Formerly crashed, see PR 1508
34
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
45
target triple = "powerpc64-apple-darwin8"

‎llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1-
; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
2-
; RUN: grep push | count 3
1+
; RUN: llc < %s -march=x86 -mtriple=i686-darwin | FileCheck %s
2+
; RUN: llc < %s -march=x86 -mtriple=i686-darwin -addr-sink-using-gep=1 | FileCheck %s
33

44
define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
55
entry:
6+
; CHECK-LABEL: @foo
7+
; CHECK: push
8+
; CHECK: push
9+
; CHECK: push
10+
; CHECK-NOT: push
11+
612
icmp sgt i32 %size, 0 ; <i1>:0 [#uses=1]
713
br i1 %0, label %bb.preheader, label %return
814

‎llvm/test/CodeGen/X86/MergeConsecutiveStores.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
2+
; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
23

34
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
45
target triple = "x86_64-apple-macosx10.8.0"

‎llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll

+20
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
2+
; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s
23
; This file tests the different cases what are involved when codegen prepare
34
; tries to get sign extension out of the way of addressing mode.
45
; This tests require an actual target as addressing mode decisions depends
@@ -281,6 +282,25 @@ define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
281282
; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
282283
; CHECK: load i32* [[ADDR2]]
283284
; CHECK: ret
285+
; CHECK-GEP-LABEL: @checkProfitability
286+
; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
287+
; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64
288+
; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1
289+
; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2
290+
; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
291+
; BB then
292+
; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
293+
; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
294+
; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48
295+
; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
296+
; CHECK-GEP: load i32* [[ADDR1]]
297+
; BB else
298+
; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
299+
; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
300+
; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48
301+
; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
302+
; CHECK-GEP: load i32* [[ADDR2]]
303+
; CHECK-GEP: ret
284304
define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
285305
%shl = shl nsw i32 %arg1, 1
286306
%add1 = add nsw i32 %shl, %arg2

‎llvm/test/CodeGen/X86/codegen-prepare.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-pc-linux -addr-sink-using-gep=1 | FileCheck %s
23

34
; Check that the CodeGenPrepare Pass
45
; does not wrongly rewrite the address computed by Instruction %4

‎llvm/test/CodeGen/X86/isel-sink.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc < %s -march=x86 | FileCheck %s
2+
; RUN: llc < %s -march=x86 -addr-sink-using-gep=1 | FileCheck %s
23

34
define i32 @test(i32* %X, i32 %B) {
45
; CHECK-LABEL: test:

‎llvm/test/CodeGen/X86/merge_store.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -addr-sink-using-gep=1 | FileCheck %s
23

34
define void @merge_store(i32* nocapture %a) {
45
; CHECK-LABEL: merge_store:

‎llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
2+
; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 -addr-sink-using-gep=1 %s -o - | FileCheck %s -check-prefix=A9
23

34
; @simple is the most basic chain of address induction variables. Chaining
45
; saves at least one register and avoids complex addressing and setup

‎llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
22
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
3+
; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X64
4+
; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X32
35

46
; @simple is the most basic chain of address induction variables. Chaining
57
; saves at least one register and avoids complex addressing and setup

0 commit comments

Comments
 (0)
Please sign in to comment.