This is an archive of the discontinued LLVM Phabricator instance.

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
518	Do you have any information on why this is not a concern anymore? The comment was introduced in https://github.com/llvm/llvm-project/commit/a471751c24324e7ba6ac5c612dbedb16c644fc44, unfortunately without a test case :(
519	Say we have a load from X, then a load from PartialAlias of X without offset, then another load from X. I think after your change, we will no longer forward from the first load, because we'll return a clobber for the PartialAlias in between, even though it cannot be used. Can you please test this situation? Maybe we should not return a clobber if no offset is available?
llvm/test/Transforms/GVN/clobber-partial-alias.ll
1 ↗	(On Diff #336379)	update_test_checks please.

Addressing comments.

dfukalov marked 2 inline comments as done.Apr 12 2021, 10:59 AM

dfukalov added inline comments.

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
518	Yes, I saw this change and because of this comment added VisitedPhiBBs check when we return PartialAlias with offset in BasicAAResult::aliasGEP.
519	Yes, you're right there is no reason to return here PartialAlias if we don't know the offset.

Harbormaster completed remote builds in B98314: Diff 336899.Apr 12 2021, 11:42 AM

dfukalov removed a parent revision: D93529: [AA] Cache (optionally) estimated PartialAlias offsets..Apr 13 2021, 6:51 AM

dfukalov mentioned this in D100454: [AA] Updates for D95543..Apr 14 2021, 1:10 AM

dfukalov mentioned this in rGce1626f34ad4: [AA] Updates for D95543..Apr 15 2021, 2:32 AM

Added test for partially aliased loads with phi translation in address.

dfukalov marked an inline comment as done.Apr 20 2021, 3:56 PM

dfukalov added inline comments.

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
518	I added test `load_load_partial_alias_cross_block_phi_trans` to check this.

Harbormaster completed remote builds in B99829: Diff 339022.Apr 20 2021, 5:10 PM

nikic added inline comments.Apr 22 2021, 2:27 PM

llvm/test/Transforms/GVN/PRE/rle.ll

1012

I believe for phi translation to work you need to have the translated geps in the predecessor. This test case worked for me:

define i32 @load_load_partial_alias_cross_block_phi_trans(i8* %P) nounwind {
; LE-LABEL: @load_load_partial_alias_cross_block_phi_trans(
; LE-NEXT:  entry:
; LE-NEXT:    [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
; LE-NEXT:    [[X1:%.*]] = load i32, i32* [[XX]], align 4
; LE-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X1]], 127
; LE-NEXT:    [[TMP0:%.*]] = lshr i32 [[X1]], 16
; LE-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
; LE-NEXT:    [[TMP2:%.*]] = lshr i32 [[X1]], 8
; LE-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
; LE-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
; LE:       if:
; LE-NEXT:    br label [[JOIN:%.*]]
; LE:       else: 
; LE-NEXT:    br label [[JOIN]]
; LE:       join: 
; LE-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
; LE-NEXT:    [[CONV6:%.*]] = zext i8 [[TMP5]] to i32
; LE-NEXT:    ret i32 [[CONV6]]
; LE:       if.end:
; LE-NEXT:    ret i32 52 
; 
; BE-LABEL: @load_load_partial_alias_cross_block2(
; BE-NEXT:  entry:
; BE-NEXT:    [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32*
; BE-NEXT:    [[X1:%.*]] = load i32, i32* [[XX]], align 4
; BE-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X1]], 127
; BE-NEXT:    [[TMP0:%.*]] = lshr i32 [[X1]], 8
; BE-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
; BE-NEXT:    [[TMP2:%.*]] = lshr i32 [[X1]], 16
; BE-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
; BE-NEXT:    br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]]
; BE:       if:
; BE-NEXT:    br label [[JOIN:%.*]]
; BE:       else: 
; BE-NEXT:    br label [[JOIN]]
; BE:       join: 
; BE-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
; BE-NEXT:    [[CONV6:%.*]] = zext i8 [[TMP5]] to i32
; BE-NEXT:    ret i32 [[CONV6]]
; BE:       if.end:
; BE-NEXT:    ret i32 52
; 
entry:
  %xx = bitcast i8* %P to i32*
  %x1 = load i32, i32* %xx, align 4
  %cmp = icmp eq i32 %x1, 127
  br i1 %cmp, label %if, label %else

if:
  %arrayidx.if = getelementptr inbounds i8, i8* %P, i64 1
  br label %join

else:
  %arrayidx.else = getelementptr inbounds i8, i8* %P, i64 2
  br label %join
  
join: 
  %idx = phi i64 [ 1, %if ], [ 2, %else ]
  %arrayidx4 = getelementptr inbounds i8, i8* %P, i64 %idx
  %tmp5 = load i8, i8* %arrayidx4, align 1
  %conv6 = zext i8 %tmp5 to i32
  ret i32 %conv6

if.end:
  ret i32 52
}

I think the actually problematic case probably has something to do with phi translation in loops (where you translate into the same block, but in a different iteration). I'll try to play with that tomorrow, but if I don't find anything let's just land this and see if any miscompiles turn up :)

LGTM, you might want to pick up the additional test cases.

llvm/lib/Transforms/Scalar/GVN.cpp

1010

I don't think we can really do anything with negative offsets, unless we want to do something like shorten the second load and combine it with the value of the first load. That's unlikely to be profitable.

llvm/test/Transforms/GVN/PRE/rle.ll

1012

I constructed a case that involves a loop, but everything seems to work fine in that case as well:

define void @load_load_partial_alias_loop(i8* %P) {
; LE-LABEL: @load_load_partial_alias_loop(
; LE-NEXT:  entry:
; LE-NEXT:    [[P_1:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 1
; LE-NEXT:    [[V_1:%.*]] = load i8, i8* [[P_1]], align 1
; LE-NEXT:    call void @use.i8(i8 [[V_1]])
; LE-NEXT:    [[P_1_32:%.*]] = bitcast i8* [[P_1]] to i32*
; LE-NEXT:    [[V_1_32:%.*]] = load i32, i32* [[P_1_32]], align 4
; LE-NEXT:    call void @use.i32(i32 [[V_1_32]])
; LE-NEXT:    [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8
; LE-NEXT:    br label [[LOOP:%.*]]
; LE:       loop:
; LE-NEXT:    [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
; LE-NEXT:    [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
; LE-NEXT:    [[P_I:%.*]] = getelementptr i8, i8* [[P]], i64 [[I]]
; LE-NEXT:    call void @use.i8(i8 [[V_I]])
; LE-NEXT:    [[P_I_32:%.*]] = bitcast i8* [[P_I]] to i32*
; LE-NEXT:    [[V_I_32:%.*]] = load i32, i32* [[P_I_32]], align 4
; LE-NEXT:    call void @use.i32(i32 [[V_I_32]])
; LE-NEXT:    [[I_INC]] = add i64 [[I]], 1
; LE-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
; LE-NEXT:    [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8
; LE-NEXT:    [[TMP2]] = trunc i32 [[TMP1]] to i8
; LE-NEXT:    br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
; LE:       loop.loop_crit_edge:
; LE-NEXT:    br label [[LOOP]]
; LE:       exit:
; LE-NEXT:    ret void
;
; BE-LABEL: @load_load_partial_alias_loop(
; BE-NEXT:  entry:
; BE-NEXT:    [[P_1:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 1
; BE-NEXT:    [[V_1:%.*]] = load i8, i8* [[P_1]], align 1
; BE-NEXT:    call void @use.i8(i8 [[V_1]])
; BE-NEXT:    [[P_1_32:%.*]] = bitcast i8* [[P_1]] to i32*
; BE-NEXT:    [[V_1_32:%.*]] = load i32, i32* [[P_1_32]], align 4
; BE-NEXT:    call void @use.i32(i32 [[V_1_32]])
; BE-NEXT:    [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24
; BE-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
; BE-NEXT:    br label [[LOOP:%.*]]
; BE:       loop:
; BE-NEXT:    [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ]
; BE-NEXT:    [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ]
; BE-NEXT:    [[P_I:%.*]] = getelementptr i8, i8* [[P]], i64 [[I]]
; BE-NEXT:    call void @use.i8(i8 [[V_I]])
; BE-NEXT:    [[P_I_32:%.*]] = bitcast i8* [[P_I]] to i32*
; BE-NEXT:    [[V_I_32:%.*]] = load i32, i32* [[P_I_32]], align 4
; BE-NEXT:    call void @use.i32(i32 [[V_I_32]])
; BE-NEXT:    [[I_INC]] = add i64 [[I]], 1
; BE-NEXT:    [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
; BE-NEXT:    [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16
; BE-NEXT:    [[TMP3]] = trunc i32 [[TMP2]] to i8
; BE-NEXT:    br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
; BE:       loop.loop_crit_edge:
; BE-NEXT:    br label [[LOOP]]
; BE:       exit:
; BE-NEXT:    ret void
;
entry:
  %P.1 = getelementptr i8, i8* %P, i64 1
  %v.1 = load i8, i8* %P.1
  call void @use.i8(i8 %v.1)
  %P.1.32 = bitcast i8* %P.1 to i32*
  %v.1.32 = load i32, i32* %P.1.32
  call void @use.i32(i32 %v.1.32)
  br label %loop

loop:
  %i = phi i64 [ 1, %entry ], [ %i.inc, %loop ]
  %P.i = getelementptr i8, i8* %P, i64 %i
  %v.i = load i8, i8* %P.i
  call void @use.i8(i8 %v.i)
  %P.i.32 = bitcast i8* %P.i to i32*
  %v.i.32 = load i32, i32* %P.i.32
  call void @use.i32(i32 %v.i.32)
  %i.inc = add i64 %i, 1
  %cmp = icmp ne i64 %i.inc, 64
  br i1 %cmp, label %loop, label %exit

exit:
  ret void
}

declare void @use.i8(i8) readnone
declare void @use.i32(i32) readnone

Now, I have some doubts that these transforms are profitable, but at least they're not wrong.

This revision is now accepted and ready to land.Apr 23 2021, 10:02 AM

Closed by commit rG6c570442318e: [GVN] Clobber partially aliased loads. (authored by dfukalov). · Explain WhyApr 24 2021, 4:14 AM

This revision was automatically updated to reflect the committed changes.

dfukalov added a commit: rG6c570442318e: [GVN] Clobber partially aliased loads..

We're seeing some issues with this patch, potentially a miscompile. When we enable assertions we get an error about widening atomic loads -- I'm not sure this is the source of the miscompile we're seeing, but it certainly looks related (I think this is causing corruption and leading to issues elsewhere).

$ cat repro.ll
; ModuleID = 'repro.ll'
source_filename = "repro.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.widget = type { i32 }
%struct.baz = type { i32, %struct.snork }
%struct.snork = type { %struct.spam }
%struct.spam = type { i32, i32 }

@global = external local_unnamed_addr global %struct.widget, align 4
@global.1 = external local_unnamed_addr global i8, align 1
@global.2 = external local_unnamed_addr global i32, align 4

define void @zot(%struct.baz* %arg) local_unnamed_addr align 2 {
bb:
  %tmp = getelementptr inbounds %struct.baz, %struct.baz* %arg, i64 0, i32 1
  %tmp1 = bitcast %struct.snork* %tmp to i64*
  %tmp2 = load i64, i64* %tmp1, align 4
  %tmp3 = getelementptr inbounds %struct.baz, %struct.baz* %arg, i64 0, i32 1, i32 0, i32 1
  %tmp4 = icmp ugt i64 %tmp2, 4294967295
  br label %bb5

bb5:                                              ; preds = %bb14, %bb
  %tmp6 = load i32, i32* %tmp3, align 4
  %tmp7 = icmp ne i32 %tmp6, 0
  %tmp8 = select i1 %tmp7, i1 %tmp4, i1 false
  %tmp9 = zext i1 %tmp8 to i8
  store i8 %tmp9, i8* @global.1, align 1
  %tmp10 = load i32, i32* @global.2, align 4
  switch i32 %tmp10, label %bb11 [
    i32 1, label %bb12
    i32 2, label %bb12
  ]

bb11:                                             ; preds = %bb5
  br label %bb14

bb12:                                             ; preds = %bb5, %bb5
  %tmp13 = load atomic i32, i32* getelementptr inbounds (%struct.widget, %struct.widget* @global, i64 0, i32 0) acquire, align 4
  br label %bb14

bb14:                                             ; preds = %bb12, %bb11
  br label %bb5
}
$ opt -O2 repro.ll -disable-output
opt: /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp:496: llvm::Value *llvm::VNCoercion::getLoadValueForLoad(llvm::LoadInst *, unsigned int, llvm::Type *, llvm::Instruction *, const llvm::DataLayout &): Assertion `SrcVal->isSimple() && "Cannot widen volatile/atomic load!"' failed.
PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
Stack dump:
0.      Program arguments: /home/rupprecht/dev/opt -O2 repro.ll -disable-output
...
#10 0x000000000768c138 llvm::VNCoercion::getLoadValueForLoad(llvm::LoadInst*, unsigned int, llvm::Type*, llvm::Instruction*, llvm::DataLayout const&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp:497:5
#11 0x00000000070a01ad llvm::gvn::AvailableValue::MaterializeAdjustedValue(llvm::LoadInst*, llvm::Instruction*, llvm::GVN&) const /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:902:11
#12 0x00000000070ae067 llvm::gvn::AvailableValueInBlock::MaterializeAdjustedValue(llvm::LoadInst*, llvm::GVN&) const /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:281:5
#13 0x00000000070a1d11 ConstructSSAForLoadSet(llvm::LoadInst*, llvm::SmallVectorImpl<llvm::gvn::AvailableValueInBlock>&, llvm::GVN&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:874:40
#14 0x00000000070a3ce6 llvm::GVN::processNonLocalLoad(llvm::LoadInst*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:1614:12
#15 0x00000000070a610a llvm::GVN::processLoad(llvm::LoadInst*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:1866:5
#16 0x00000000070a7398 llvm::GVN::processInstruction(llvm::Instruction*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2292:9
#17 0x00000000070a8175 llvm::GVN::processBlock(llvm::BasicBlock*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2490:24
#18 0x00000000070a7c4f llvm::GVN::iterateOnFunction(llvm::Function&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2837:16
#19 0x000000000709fdd6 llvm::GVN::runImpl(llvm::Function&, llvm::AssumptionCache&, llvm::DominatorTree&, llvm::TargetLibraryInfo const&, llvm::AAResults&, llvm::MemoryDependenceResults*, llvm::LoopInfo*, llvm::OptimizationRemarkEmitter*, llvm::MemorySSA*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2436:20
#20 0x000000000709fa6e llvm::GVN::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:674:8
#21 0x0000000007873fb7 llvm::detail::PassModel<llvm::Function, llvm::GVN, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function> >::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:85:17
#22 0x000000000691574c llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function> >::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/include/llvm/IR/PassManager.h:517:16
#23 0x0000000004617e77 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function> >, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function> >::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:85:17

In D95543#2745161, @rupprecht wrote:

We're seeing some issues with this patch, potentially a miscompile. When we enable assertions we get an error about widening atomic loads -- I'm not sure this is the source of the miscompile we're seeing, but it certainly looks related (I think this is causing corruption and leading to issues elsewhere).

@rupprecht thanks for report, I'll take a look.

Thanks for taking a look. Would it be all right to revert in the meantime? (I'm assuming the fix is not trivial)

rupprecht added a reverting change: rGfec294599894: Revert "[GVN] Clobber partially aliased loads.".May 11 2021, 4:09 PM

Temporarily reverted in fec2945998947f04d672e9c5f33b57f7177474c0 to keep trunk clean.

I'm not a domain expert here, but I can test out a fix for this patch when you have one ready.

Herald added a subscriber: jeroen.dobbelaere. · View Herald TranscriptMay 11 2021, 5:50 PM

Hi @nikic, I've prepared updated patch with fix for the issue reported recently. Should I create new review request or reopen this one and update the diff?

In D95543#2753756, @dfukalov wrote:

Hi @nikic, I've prepared updated patch with fix for the issue reported recently. Should I create new review request or reopen this one and update the diff?

Either way is fine. If you reopen this one, please also mark it as "review requested", so it goes back to the queue.

dfukalov reopened this revision.May 12 2021, 5:55 AM

This revision is now accepted and ready to land.May 12 2021, 5:55 AM

Atomic loads are stored in MD as clobber dependent in caches so GVN got a trash
offset for the pair of unaliased load and atomic load in the test case.

Added storage map for offsets of different dependent loads, updated tests.

Herald added a subscriber: jfb. · View Herald TranscriptMay 12 2021, 5:56 AM

dfukalov requested review of this revision.May 12 2021, 5:57 AM

Harbormaster completed remote builds in B104035: Diff 344792.May 12 2021, 6:35 AM

Updated patch LG -- the unreduced test passes now (and has no assertion errors). Thanks! (Deferring to @nikic or others for actual re-review).

Something that's not quite clear to me is why the Offset = None at the start of getDependency() wasn't sufficient. For the atomic load clobber case, wouldn't it keep that None value?

getDependency() call is just a start of processing in GVN, and MemoryDependenceResults contains a number of a load' dependencies. One of them was NonLocal clobbering dependency between %tmp4.1 = load i8... and %tmp6.1 = load atomic i8... since latter is atomic (e.g. MemoryDependenceAnalysis.cpp$494-500). And GVN got this clobbering dependency not from getDependency().

Let's see what happening when GVN processes %tmp4.1 = load i8, i8* %tmp3.1, align 4 from the following IR

define void @load_load_partial_alias_atomic(i8* %arg) {
bb:
  %tmp1.1 = getelementptr inbounds i8, i8* %arg, i64 0
  %tmp2.1 = getelementptr inbounds i8, i8* %arg, i64 1
  %tmp2.2 = bitcast i8* %tmp2.1 to i64*
  %tmp2.3 = load i64, i64* %tmp2.2, align 4
  %tmp2.4 = icmp ugt i64 %tmp2.3, 1

  %tmp3.1 = getelementptr inbounds i8, i8* %arg, i64 2
  br label %bb5

bb5:                                              ; preds = %bb14, %bb
  %tmp4.1 = load i8, i8* %tmp3.1, align 4
  %tmp6.1 = load atomic i8, i8* getelementptr inbounds (i8, i8* @global, i64 0) acquire, align 4
  %tmp7.1 = add i8 %tmp6.1, %tmp4.1
  store i8 %tmp7.1, i8* %tmp1.1
  br label %bb5

}

At first, getDependency() returns just NonLocal dependency and GVN falls to processNonLocalLoad() (GVN.cpp$1849-1853).
Then getNonLocalPointerDependency() returns both NonLocal dependencies (to %tmp2.3 = load i64... and to %tmp6.1 = load atomic i8).
And then AnalyzeLoadAvailability() tries to process them and asks MDA for a clobber offsets, without calling getDependency() between their processing.
So GVN thought there are valid clobbering offsets for both of them and tried to optimize incorrect pair of loads (with atomic load one).

Thanks for the explanation! Probably the best solution would be the same we did in AA, which is to embed the offset in MemDepResult, then we couldn't have this kind of "out of sync" issue by design. But this also LGTM.

This revision is now accepted and ready to land.May 13 2021, 1:10 PM

Closed by commit rGfdae3fc8b3e9: [GVN] Clobber partially aliased loads. (authored by dfukalov). · Explain WhyMay 14 2021, 1:17 AM

This revision was automatically updated to reflect the committed changes.

dfukalov added a commit: rGfdae3fc8b3e9: [GVN] Clobber partially aliased loads..

Yes, I thought about packing offset to MemDepResult, but it is already a packed pointer with other info and it seems to be overkill to double its size for a quite rare offset value.

Revision Contents

Path

Size

llvm/

include/

llvm/

Analysis/

MemoryDependenceAnalysis.h

12 lines

lib/

Analysis/

MemoryDependenceAnalysis.cpp

11 lines

Transforms/

Scalar/

GVN.cpp

18 lines

test/

Transforms/

GVN/

PRE/

rle.ll

290 lines

Diff 345367

llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h

Show First 20 Lines • Show All 358 Lines • ▼ Show 20 Lines	private:
AssumptionCache &AC;		AssumptionCache &AC;
const TargetLibraryInfo &TLI;		const TargetLibraryInfo &TLI;
DominatorTree &DT;		DominatorTree &DT;
PhiValues &PV;		PhiValues &PV;
PredIteratorCache PredCache;		PredIteratorCache PredCache;

unsigned DefaultBlockScanLimit;		unsigned DefaultBlockScanLimit;

		/// Offsets to dependant clobber loads.
		using ClobberOffsetsMapType = DenseMap<LoadInst *, int32_t>;
		ClobberOffsetsMapType ClobberOffsets;

public:		public:
MemoryDependenceResults(AAResults &AA, AssumptionCache &AC,		MemoryDependenceResults(AAResults &AA, AssumptionCache &AC,
const TargetLibraryInfo &TLI, DominatorTree &DT,		const TargetLibraryInfo &TLI, DominatorTree &DT,
PhiValues &PV, unsigned DefaultBlockScanLimit)		PhiValues &PV, unsigned DefaultBlockScanLimit)
: AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV),		: AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV),
DefaultBlockScanLimit(DefaultBlockScanLimit) {}		DefaultBlockScanLimit(DefaultBlockScanLimit) {}

/// Handle invalidation in the new PM.		/// Handle invalidation in the new PM.
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines	public:
/// store the same value and NonLocal which indicate that non-local Def was		/// store the same value and NonLocal which indicate that non-local Def was
/// found, which can be retrieved by calling getNonLocalPointerDependency		/// found, which can be retrieved by calling getNonLocalPointerDependency
/// with the same queried instruction.		/// with the same queried instruction.
MemDepResult getInvariantGroupPointerDependency(LoadInst LI, BasicBlock BB);		MemDepResult getInvariantGroupPointerDependency(LoadInst LI, BasicBlock BB);

/// Release memory in caches.		/// Release memory in caches.
void releaseMemory();		void releaseMemory();

		/// Return the clobber offset to dependent instruction.
		Optional<int32_t> getClobberOffset(LoadInst *DepInst) const {
		const auto Off = ClobberOffsets.find(DepInst);
		if (Off != ClobberOffsets.end())
		return Off->getSecond();
		return None;
		}

private:		private:
MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,		MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall,
BasicBlock::iterator ScanIt,		BasicBlock::iterator ScanIt,
BasicBlock *BB);		BasicBlock *BB);
bool getNonLocalPointerDepFromBB(Instruction *QueryInst,		bool getNonLocalPointerDepFromBB(Instruction *QueryInst,
const PHITransAddr &Pointer,		const PHITransAddr &Pointer,
const MemoryLocation &Loc, bool isLoad,		const MemoryLocation &Loc, bool isLoad,
BasicBlock *BB,		BasicBlock *BB,
▲ Show 20 Lines • Show All 61 Lines • Show Last 20 Lines

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

Show First 20 Lines • Show All 506 Lines • ▼ Show 20 Lines	if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (isLoad) {		if (isLoad) {
if (R == AliasResult::NoAlias)		if (R == AliasResult::NoAlias)
continue;		continue;

// Must aliased loads are defs of each other.		// Must aliased loads are defs of each other.
if (R == AliasResult::MustAlias)		if (R == AliasResult::MustAlias)
return MemDepResult::getDef(Inst);		return MemDepResult::getDef(Inst);

#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
// in terms of clobbering loads, but since it does this by looking
// at the clobbering load directly, it doesn't know about any
// phi translation that may have happened along the way.
nikicUnsubmitted Done Reply Inline Actions Do you have any information on why this is not a concern anymore? The comment was introduced in https://github.com/llvm/llvm-project/commit/a471751c24324e7ba6ac5c612dbedb16c644fc44, unfortunately without a test case :( nikic: Do you have any information on why this is not a concern anymore? The comment was introduced in…
dfukalovAuthorUnsubmitted Done Reply Inline Actions Yes, I saw this change and because of this comment added VisitedPhiBBs check when we return PartialAlias with offset in BasicAAResult::aliasGEP. dfukalov: Yes, I saw this change and because of this comment added VisitedPhiBBs check when [[ https…
dfukalovAuthorUnsubmitted Done Reply Inline Actions I added test `load_load_partial_alias_cross_block_phi_trans` to check this. dfukalov: I added test `load_load_partial_alias_cross_block_phi_trans` to check this.

// If we have a partial alias, then return this as a clobber for the		// If we have a partial alias, then return this as a clobber for the
// client to handle.		// client to handle.
if (R == AliasResult::PartialAlias)		if (R == AliasResult::PartialAlias && R.hasOffset()) {
		ClobberOffsets[LI] = R.getOffset();
return MemDepResult::getClobber(Inst);		return MemDepResult::getClobber(Inst);
		nikicUnsubmitted Done Reply Inline Actions Say we have a load from X, then a load from PartialAlias of X without offset, then another load from X. I think after your change, we will no longer forward from the first load, because we'll return a clobber for the PartialAlias in between, even though it cannot be used. Can you please test this situation? Maybe we should not return a clobber if no offset is available? nikic: Say we have a load from X, then a load from PartialAlias of X without offset, then another load…
		dfukalovAuthorUnsubmitted Done Reply Inline Actions Yes, you're right there is no reason to return here PartialAlias if we don't know the offset. dfukalov: Yes, you're right there is no reason to return here PartialAlias if we don't know the offset.
#endif		}

// Random may-alias loads don't depend on each other without a		// Random may-alias loads don't depend on each other without a
// dependence.		// dependence.
continue;		continue;
}		}

// Stores don't depend on other no-aliased accesses.		// Stores don't depend on other no-aliased accesses.
if (R == AliasResult::NoAlias)		if (R == AliasResult::NoAlias)
▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines	MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// No dependence found. If this is the entry block of the function, it is		// No dependence found. If this is the entry block of the function, it is
// unknown, otherwise it is non-local.		// unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())		if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();		return MemDepResult::getNonLocal();
return MemDepResult::getNonFuncLocal();		return MemDepResult::getNonFuncLocal();
}		}

MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {		MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
		ClobberOffsets.clear();
Instruction *ScanPos = QueryInst;		Instruction *ScanPos = QueryInst;

// Check for a cached result		// Check for a cached result
MemDepResult &LocalCache = LocalDeps[QueryInst];		MemDepResult &LocalCache = LocalDeps[QueryInst];

// If the cached entry is non-dirty, just return it. Note that this depends		// If the cached entry is non-dirty, just return it. Note that this depends
// on MemDepResult's default constructing to 'dirty'.		// on MemDepResult's default constructing to 'dirty'.
if (!LocalCache.isDirty())		if (!LocalCache.isDirty())
▲ Show 20 Lines • Show All 1,151 Lines • Show Last 20 Lines

llvm/lib/Transforms/Scalar/GVN.cpp

Show First 20 Lines • Show All 993 Lines • ▼ Show 20 Lines	if (DepInfo.isClobber()) {
// load i8* (P+1)		// load i8* (P+1)
// if we have this, replace the later with an extraction from the former.		// if we have this, replace the later with an extraction from the former.
if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {		if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) {
// If this is a clobber and L is the first instruction in its block, then		// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.		// we have the first instruction in the entry block.
// Can't forward from non-atomic to atomic without violating memory model.		// Can't forward from non-atomic to atomic without violating memory model.
if (DepLoad != Load && Address &&		if (DepLoad != Load && Address &&
Load->isAtomic() <= DepLoad->isAtomic()) {		Load->isAtomic() <= DepLoad->isAtomic()) {
int Offset = analyzeLoadFromClobberingLoad(Load->getType(), Address,		Type *LoadType = Load->getType();
DepLoad, DL);		int Offset = -1;

		// If MD reported clobber, check it was nested.
		if (DepInfo.isClobber() &&
		canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) {
		const auto ClobberOff = MD->getClobberOffset(DepLoad);
		// GVN has no deal with a negative offset.
		Offset = (ClobberOff == None \|\| ClobberOff.getValue() < 0)
		nikicUnsubmitted Not Done Reply Inline Actions I don't think we can really do anything with negative offsets, unless we want to do something like shorten the second load and combine it with the value of the first load. That's unlikely to be profitable. nikic: I don't think we can really do anything with negative offsets, unless we want to do something…
		? -1
		: ClobberOff.getValue();
		}
		if (Offset == -1)
		Offset =
		analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL);
if (Offset != -1) {		if (Offset != -1) {
Res = AvailableValue::getLoad(DepLoad, Offset);		Res = AvailableValue::getLoad(DepLoad, Offset);
return true;		return true;
}		}
}		}
}		}

// If the clobbering value is a memset/memcpy/memmove, see if we can		// If the clobbering value is a memset/memcpy/memmove, see if we can
▲ Show 20 Lines • Show All 2,037 Lines • Show Last 20 Lines

llvm/test/Transforms/GVN/PRE/rle.ll

Show All 31 Lines	;
%Y = load i8, i8* %X		%Y = load i8, i8* %X
ret i8 %Y		ret i8 %Y
}		}

;; No PR filed, crashed in CaptureTracker.		;; No PR filed, crashed in CaptureTracker.
declare void @helper()		declare void @helper()
define void @crash1() {		define void @crash1() {
; CHECK-LABEL: @crash1(		; CHECK-LABEL: @crash1(
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) #[[ATTR6:[0-9]+]]		; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) nounwind		tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i1 false) nounwind
%tmp = load i8, i8* bitcast (void ()* @helper to i8*)		%ttmp = load i8, i8* bitcast (void ()* @helper to i8*)
%x = icmp eq i8 %tmp, 15		%x = icmp eq i8 %ttmp, 15
ret void		ret void
}		}


;;===----------------------------------------------------------------------===;;		;;===----------------------------------------------------------------------===;;
;; Store -> Load and Load -> Load forwarding where src and dst are different		;; Store -> Load and Load -> Load forwarding where src and dst are different
;; types, but where the base pointer is a must alias.		;; types, but where the base pointer is a must alias.
;;===----------------------------------------------------------------------===;;		;;===----------------------------------------------------------------------===;;
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
; CHECK-NEXT: [[CONV:%.]] = bitcast i16 [[A:%.]] to i8		; CHECK-NEXT: [[CONV:%.]] = bitcast i16 [[A:%.]] to i8
; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false)		; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 1, i64 200, i1 false)
; CHECK-NEXT: ret i16 257		; CHECK-NEXT: ret i16 257
;		;
entry:		entry:
%conv = bitcast i16* %A to i8*		%conv = bitcast i16* %A to i8*
tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)		tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i1 false)
%arrayidx = getelementptr inbounds i16, i16* %A, i64 42		%arrayidx = getelementptr inbounds i16, i16* %A, i64 42
%tmp2 = load i16, i16* %arrayidx		%ttmp2 = load i16, i16* %arrayidx
ret i16 %tmp2		ret i16 %ttmp2
}		}

; memset -> float forwarding.		; memset -> float forwarding.
define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {		define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
; CHECK-LABEL: @memset_to_float_local(		; CHECK-LABEL: @memset_to_float_local(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = bitcast float [[A:%.]] to i8		; CHECK-NEXT: [[CONV:%.]] = bitcast float [[A:%.]] to i8
; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 [[VAL:%.*]], i64 400, i1 false)		; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[CONV]], i8 [[VAL:%.*]], i64 400, i1 false)
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32		; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8		; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8
; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16		; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]		; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float		; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret float [[TMP5]]		; CHECK-NEXT: ret float [[TMP5]]
;		;
entry:		entry:
%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]		%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]
tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i1 false)		tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i1 false)
%arrayidx = getelementptr inbounds float, float* %A, i64 42 ; <float*> [#uses=1]		%arrayidx = getelementptr inbounds float, float* %A, i64 42 ; <float*> [#uses=1]
%tmp2 = load float, float* %arrayidx ; <float> [#uses=1]		%ttmp2 = load float, float* %arrayidx ; <float> [#uses=1]
ret float %tmp2		ret float %ttmp2
}		}

;; non-local memset -> i16 load forwarding.		;; non-local memset -> i16 load forwarding.
define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {		define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
; CHECK-LABEL: @memset_to_i16_nonlocal0(		; CHECK-LABEL: @memset_to_i16_nonlocal0(
; CHECK-NEXT: [[P3:%.]] = bitcast i16 [[P:%.]] to i8		; CHECK-NEXT: [[P3:%.]] = bitcast i16 [[P:%.]] to i8
; CHECK-NEXT: br i1 [[COND:%.]], label [[T:%.]], label [[F:%.*]]		; CHECK-NEXT: br i1 [[COND:%.]], label [[T:%.]], label [[F:%.*]]
; CHECK: T:		; CHECK: T:
Show All 33 Lines
; CHECK-NEXT: [[CONV:%.]] = bitcast float [[A:%.]] to i8		; CHECK-NEXT: [[CONV:%.]] = bitcast float [[A:%.]] to i8
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CONV]], i8* bitcast ({ i32, float, i32 }* @GCst to i8*), i64 12, i1 false)		; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CONV]], i8* bitcast ({ i32, float, i32 }* @GCst to i8*), i64 12, i1 false)
; CHECK-NEXT: ret float 1.400000e+01		; CHECK-NEXT: ret float 1.400000e+01
;		;
entry:		entry:
%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]		%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i1 false)		tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i1 false)
%arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]		%arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
%tmp2 = load float, float* %arrayidx ; <float> [#uses=1]		%ttmp2 = load float, float* %arrayidx ; <float> [#uses=1]
ret float %tmp2		ret float %ttmp2
}		}

; memcpy from address space 1		; memcpy from address space 1
define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {		define float @memcpy_to_float_local_as1(float* %A) nounwind ssp {
; CHECK-LABEL: @memcpy_to_float_local_as1(		; CHECK-LABEL: @memcpy_to_float_local_as1(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CONV:%.]] = bitcast float [[A:%.]] to i8		; CHECK-NEXT: [[CONV:%.]] = bitcast float [[A:%.]] to i8
; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* [[CONV]], i8 addrspace(1)* bitcast ({ i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)		; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* [[CONV]], i8 addrspace(1)* bitcast ({ i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)
; CHECK-NEXT: ret float 1.400000e+01		; CHECK-NEXT: ret float 1.400000e+01
;		;
entry:		entry:
%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]		%conv = bitcast float* %A to i8* ; <i8*> [#uses=1]
tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)		tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i1 false)
%arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]		%arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
%tmp2 = load float, float* %arrayidx ; <float> [#uses=1]		%ttmp2 = load float, float* %arrayidx ; <float> [#uses=1]
ret float %tmp2		ret float %ttmp2
}		}

;; non-local i32/float -> i8 load forwarding.		;; non-local i32/float -> i8 load forwarding.
define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {		define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
; LE-LABEL: @coerce_mustalias_nonlocal0(		; LE-LABEL: @coerce_mustalias_nonlocal0(
; LE-NEXT: [[P2:%.]] = bitcast i32 [[P:%.]] to float		; LE-NEXT: [[P2:%.]] = bitcast i32 [[P:%.]] to float
; LE-NEXT: br i1 [[COND:%.]], label [[T:%.]], label [[F:%.*]]		; LE-NEXT: br i1 [[COND:%.]], label [[T:%.]], label [[F:%.*]]
; LE: T:		; LE: T:
▲ Show 20 Lines • Show All 624 Lines • ▼ Show 20 Lines



; PR6642		; PR6642
define i32 @memset_to_load() nounwind readnone {		define i32 @memset_to_load() nounwind readnone {
; CHECK-LABEL: @memset_to_load(		; CHECK-LABEL: @memset_to_load(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[X:%.*]] = alloca [256 x i32], align 4		; CHECK-NEXT: [[X:%.*]] = alloca [256 x i32], align 4
; CHECK-NEXT: [[TMP:%.]] = bitcast [256 x i32] [[X]] to i8*		; CHECK-NEXT: [[TTMP:%.]] = bitcast [256 x i32] [[X]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 1024, i1 false)		; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TTMP]], i8 0, i64 1024, i1 false)
; CHECK-NEXT: ret i32 0		; CHECK-NEXT: ret i32 0
;		;
entry:		entry:
%x = alloca [256 x i32], align 4 ; <[256 x i32]*> [#uses=2]		%x = alloca [256 x i32], align 4 ; <[256 x i32]*> [#uses=2]
%tmp = bitcast [256 x i32]* %x to i8* ; <i8*> [#uses=1]		%ttmp = bitcast [256 x i32]* %x to i8* ; <i8*> [#uses=1]
call void @llvm.memset.p0i8.i64(i8* align 4 %tmp, i8 0, i64 1024, i1 false)		call void @llvm.memset.p0i8.i64(i8* align 4 %ttmp, i8 0, i64 1024, i1 false)
%arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %x, i32 0, i32 0 ; <i32*>		%arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %x, i32 0, i32 0 ; <i32*>
%tmp1 = load i32, i32* %arraydecay ; <i32> [#uses=1]		%ttmp1 = load i32, i32* %arraydecay ; <i32> [#uses=1]
ret i32 %tmp1		ret i32 %ttmp1
}		}


;;===----------------------------------------------------------------------===;;		;;===----------------------------------------------------------------------===;;
;; Load -> Load forwarding in partial alias case.		;; Load -> Load forwarding in partial alias case.
;;===----------------------------------------------------------------------===;;		;;===----------------------------------------------------------------------===;;

define i32 @load_load_partial_alias(i8* %P) nounwind ssp {		define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
; CHECK-LABEL: @load_load_partial_alias(		; CHECK-LABEL: @load_load_partial_alias(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.]] = bitcast i8 [[P:%.]] to i32		; CHECK-NEXT: [[TMP0:%.]] = bitcast i8 [[P:%.]] to i32
; CHECK-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP0]], align 4		; CHECK-NEXT: [[TTMP2:%.]] = load i32, i32 [[TMP0]], align 4
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[P]], i64 1		; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[TTMP2]], 8
; CHECK-NEXT: [[TMP5:%.]] = load i8, i8 [[ADD_PTR]], align 1		; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[TTMP2]], 16
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP5]] to i32		; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CONV]]		; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP2]] to i32
		; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]]
; CHECK-NEXT: ret i32 [[ADD]]		; CHECK-NEXT: ret i32 [[ADD]]
;		;
entry:		entry:
%0 = bitcast i8* %P to i32*		%0 = bitcast i8* %P to i32*
%tmp2 = load i32, i32* %0		%ttmp2 = load i32, i32* %0
%add.ptr = getelementptr inbounds i8, i8* %P, i64 1		%add.ptr = getelementptr inbounds i8, i8* %P, i64 1
%tmp5 = load i8, i8* %add.ptr		%ttmp5 = load i8, i8* %add.ptr
%conv = zext i8 %tmp5 to i32		%conv = zext i8 %ttmp5 to i32
%add = add nsw i32 %tmp2, %conv		%add = add nsw i32 %ttmp2, %conv
ret i32 %add		ret i32 %add
}		}


; Cross block partial alias case.		; Cross block partial alias case.
define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {		define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
; CHECK-LABEL: @load_load_partial_alias_cross_block(		; CHECK-LABEL: @load_load_partial_alias_cross_block(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[XX:%.]] = bitcast i8 [[P:%.]] to i32		; CHECK-NEXT: [[XX:%.]] = bitcast i8 [[P:%.]] to i32
; CHECK-NEXT: [[X1:%.]] = load i32, i32 [[XX]], align 4		; CHECK-NEXT: [[X1:%.]] = load i32, i32 [[XX]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
		; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
		; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
; CHECK-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.]], label [[IF_END:%.]]		; CHECK-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.]], label [[IF_END:%.]]
; CHECK: land.lhs.true:		; CHECK: land.lhs.true:
; CHECK-NEXT: [[ARRAYIDX4:%.]] = getelementptr inbounds i8, i8 [[P]], i64 1		; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP5:%.]] = load i8, i8 [[ARRAYIDX4]], align 1
; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: ret i32 [[CONV6]]		; CHECK-NEXT: ret i32 [[CONV6]]
; CHECK: if.end:		; CHECK: if.end:
; CHECK-NEXT: ret i32 52		; CHECK-NEXT: ret i32 52
;		;
entry:		entry:
%xx = bitcast i8* %P to i32*		%xx = bitcast i8* %P to i32*
%x1 = load i32, i32* %xx, align 4		%x1 = load i32, i32* %xx, align 4
%cmp = icmp eq i32 %x1, 127		%cmp = icmp eq i32 %x1, 127
br i1 %cmp, label %land.lhs.true, label %if.end		br i1 %cmp, label %land.lhs.true, label %if.end

land.lhs.true: ; preds = %entry		land.lhs.true: ; preds = %entry
%arrayidx4 = getelementptr inbounds i8, i8* %P, i64 1		%arrayidx4 = getelementptr inbounds i8, i8* %P, i64 1
%tmp5 = load i8, i8* %arrayidx4, align 1		%ttmp5 = load i8, i8* %arrayidx4, align 1
%conv6 = zext i8 %tmp5 to i32		%conv6 = zext i8 %ttmp5 to i32
ret i32 %conv6		ret i32 %conv6

if.end:		if.end:
ret i32 52		ret i32 52
}		}

		define i32 @load_load_partial_alias_cross_block_phi_trans(i8* %P) nounwind {
		; CHECK-LABEL: @load_load_partial_alias_cross_block_phi_trans(
		nikicUnsubmitted Not Done Reply Inline Actions I believe for phi translation to work you need to have the translated geps in the predecessor. This test case worked for me: define i32 @load_load_partial_alias_cross_block_phi_trans(i8* %P) nounwind { ; LE-LABEL: @load_load_partial_alias_cross_block_phi_trans( ; LE-NEXT: entry: ; LE-NEXT: [[XX:%.]] = bitcast i8 [[P:%.]] to i32 ; LE-NEXT: [[X1:%.]] = load i32, i32 [[XX]], align 4 ; LE-NEXT: [[CMP:%.]] = icmp eq i32 [[X1]], 127 ; LE-NEXT: [[TMP0:%.]] = lshr i32 [[X1]], 16 ; LE-NEXT: [[TMP1:%.]] = trunc i32 [[TMP0]] to i8 ; LE-NEXT: [[TMP2:%.]] = lshr i32 [[X1]], 8 ; LE-NEXT: [[TMP3:%.]] = trunc i32 [[TMP2]] to i8 ; LE-NEXT: br i1 [[CMP]], label [[IF:%.]], label [[ELSE:%.]] ; LE: if: ; LE-NEXT: br label [[JOIN:%.]] ; LE: else: ; LE-NEXT: br label [[JOIN]] ; LE: join: ; LE-NEXT: [[TMP5:%.]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ] ; LE-NEXT: [[CONV6:%.]] = zext i8 [[TMP5]] to i32 ; LE-NEXT: ret i32 [[CONV6]] ; LE: if.end: ; LE-NEXT: ret i32 52 ; ; BE-LABEL: @load_load_partial_alias_cross_block2( ; BE-NEXT: entry: ; BE-NEXT: [[XX:%.]] = bitcast i8 [[P:%.]] to i32 ; BE-NEXT: [[X1:%.]] = load i32, i32 [[XX]], align 4 ; BE-NEXT: [[CMP:%.]] = icmp eq i32 [[X1]], 127 ; BE-NEXT: [[TMP0:%.]] = lshr i32 [[X1]], 8 ; BE-NEXT: [[TMP1:%.]] = trunc i32 [[TMP0]] to i8 ; BE-NEXT: [[TMP2:%.]] = lshr i32 [[X1]], 16 ; BE-NEXT: [[TMP3:%.]] = trunc i32 [[TMP2]] to i8 ; BE-NEXT: br i1 [[CMP]], label [[IF:%.]], label [[ELSE:%.]] ; BE: if: ; BE-NEXT: br label [[JOIN:%.]] ; BE: else: ; BE-NEXT: br label [[JOIN]] ; BE: join: ; BE-NEXT: [[TMP5:%.]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ] ; BE-NEXT: [[CONV6:%.]] = zext i8 [[TMP5]] to i32 ; BE-NEXT: ret i32 [[CONV6]] ; BE: if.end: ; BE-NEXT: ret i32 52 ; entry: %xx = bitcast i8* %P to i32* %x1 = load i32, i32* %xx, align 4 %cmp = icmp eq i32 %x1, 127 br i1 %cmp, label %if, label %else if: %arrayidx.if = getelementptr inbounds i8, i8* %P, i64 1 br label %join else: %arrayidx.else = getelementptr inbounds i8, i8* %P, i64 2 br label %join join: %idx = phi i64 [ 1, %if ], [ 2, %else ] %arrayidx4 = getelementptr inbounds i8, i8* %P, i64 %idx %tmp5 = load i8, i8* %arrayidx4, align 1 %conv6 = zext i8 %tmp5 to i32 ret i32 %conv6 if.end: ret i32 52 } I think the actually problematic case probably has something to do with phi translation in loops (where you translate into the same block, but in a different iteration). I'll try to play with that tomorrow, but if I don't find anything let's just land this and see if any miscompiles turn up :) nikic: I believe for phi translation to work you need to have the translated geps in the predecessor.
		nikicUnsubmitted Not Done Reply Inline Actions I constructed a case that involves a loop, but everything seems to work fine in that case as well: define void @load_load_partial_alias_loop(i8* %P) { ; LE-LABEL: @load_load_partial_alias_loop( ; LE-NEXT: entry: ; LE-NEXT: [[P_1:%.]] = getelementptr i8, i8 [[P:%.]], i64 1 ; LE-NEXT: [[V_1:%.]] = load i8, i8* [[P_1]], align 1 ; LE-NEXT: call void @use.i8(i8 [[V_1]]) ; LE-NEXT: [[P_1_32:%.]] = bitcast i8 [[P_1]] to i32* ; LE-NEXT: [[V_1_32:%.]] = load i32, i32 [[P_1_32]], align 4 ; LE-NEXT: call void @use.i32(i32 [[V_1_32]]) ; LE-NEXT: [[TMP0:%.]] = trunc i32 [[V_1_32]] to i8 ; LE-NEXT: br label [[LOOP:%.]] ; LE: loop: ; LE-NEXT: [[V_I:%.]] = phi i8 [ [[TMP0]], [[ENTRY:%.]] ], [ [[TMP2:%.]], [[LOOP_LOOP_CRIT_EDGE:%.]] ] ; LE-NEXT: [[I:%.]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.]], [[LOOP_LOOP_CRIT_EDGE]] ] ; LE-NEXT: [[P_I:%.]] = getelementptr i8, i8 [[P]], i64 [[I]] ; LE-NEXT: call void @use.i8(i8 [[V_I]]) ; LE-NEXT: [[P_I_32:%.]] = bitcast i8 [[P_I]] to i32* ; LE-NEXT: [[V_I_32:%.]] = load i32, i32 [[P_I_32]], align 4 ; LE-NEXT: call void @use.i32(i32 [[V_I_32]]) ; LE-NEXT: [[I_INC]] = add i64 [[I]], 1 ; LE-NEXT: [[CMP:%.]] = icmp ne i64 [[I_INC]], 64 ; LE-NEXT: [[TMP1:%.]] = lshr i32 [[V_I_32]], 8 ; LE-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8 ; LE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.]] ; LE: loop.loop_crit_edge: ; LE-NEXT: br label [[LOOP]] ; LE: exit: ; LE-NEXT: ret void ; ; BE-LABEL: @load_load_partial_alias_loop( ; BE-NEXT: entry: ; BE-NEXT: [[P_1:%.]] = getelementptr i8, i8* [[P:%.]], i64 1 ; BE-NEXT: [[V_1:%.]] = load i8, i8* [[P_1]], align 1 ; BE-NEXT: call void @use.i8(i8 [[V_1]]) ; BE-NEXT: [[P_1_32:%.]] = bitcast i8 [[P_1]] to i32* ; BE-NEXT: [[V_1_32:%.]] = load i32, i32 [[P_1_32]], align 4 ; BE-NEXT: call void @use.i32(i32 [[V_1_32]]) ; BE-NEXT: [[TMP0:%.]] = lshr i32 [[V_1_32]], 24 ; BE-NEXT: [[TMP1:%.]] = trunc i32 [[TMP0]] to i8 ; BE-NEXT: br label [[LOOP:%.]] ; BE: loop: ; BE-NEXT: [[V_I:%.]] = phi i8 [ [[TMP1]], [[ENTRY:%.]] ], [ [[TMP3:%.]], [[LOOP_LOOP_CRIT_EDGE:%.]] ] ; BE-NEXT: [[I:%.]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.]], [[LOOP_LOOP_CRIT_EDGE]] ] ; BE-NEXT: [[P_I:%.]] = getelementptr i8, i8* [[P]], i64 [[I]] ; BE-NEXT: call void @use.i8(i8 [[V_I]]) ; BE-NEXT: [[P_I_32:%.]] = bitcast i8 [[P_I]] to i32* ; BE-NEXT: [[V_I_32:%.]] = load i32, i32 [[P_I_32]], align 4 ; BE-NEXT: call void @use.i32(i32 [[V_I_32]]) ; BE-NEXT: [[I_INC]] = add i64 [[I]], 1 ; BE-NEXT: [[CMP:%.]] = icmp ne i64 [[I_INC]], 64 ; BE-NEXT: [[TMP2:%.]] = lshr i32 [[V_I_32]], 16 ; BE-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8 ; BE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.]] ; BE: loop.loop_crit_edge: ; BE-NEXT: br label [[LOOP]] ; BE: exit: ; BE-NEXT: ret void ; entry: %P.1 = getelementptr i8, i8 %P, i64 1 %v.1 = load i8, i8* %P.1 call void @use.i8(i8 %v.1) %P.1.32 = bitcast i8* %P.1 to i32* %v.1.32 = load i32, i32* %P.1.32 call void @use.i32(i32 %v.1.32) br label %loop loop: %i = phi i64 [ 1, %entry ], [ %i.inc, %loop ] %P.i = getelementptr i8, i8* %P, i64 %i %v.i = load i8, i8* %P.i call void @use.i8(i8 %v.i) %P.i.32 = bitcast i8* %P.i to i32* %v.i.32 = load i32, i32* %P.i.32 call void @use.i32(i32 %v.i.32) %i.inc = add i64 %i, 1 %cmp = icmp ne i64 %i.inc, 64 br i1 %cmp, label %loop, label %exit exit: ret void } declare void @use.i8(i8) readnone declare void @use.i32(i32) readnone Now, I have some doubts that these transforms are profitable, but at least they're not wrong. nikic: I constructed a case that involves a loop, but everything seems to work fine in that case as…
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[XX:%.]] = bitcast i8 [[P:%.]] to i32
		; CHECK-NEXT: [[X1:%.]] = load i32, i32 [[XX]], align 4
		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127
		; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16
		; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
		; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8
		; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16
		; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
		; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.]], label [[ELSE:%.]]
		; CHECK: if:
		; CHECK-NEXT: br label [[JOIN:%.*]]
		; CHECK: else:
		; CHECK-NEXT: br label [[JOIN]]
		; CHECK: join:
		; CHECK-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ]
		; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32
		; CHECK-NEXT: ret i32 [[CONV6]]
		; CHECK: if.end:
		; CHECK-NEXT: ret i32 52
		;
		entry:
		%xx = bitcast i8* %P to i32*
		%x1 = load i32, i32* %xx, align 4
		%cmp = icmp eq i32 %x1, 127
		br i1 %cmp, label %if, label %else

		if:
		%arrayidx.if = getelementptr inbounds i8, i8* %P, i64 1
		br label %join

		else:
		%arrayidx.else = getelementptr inbounds i8, i8* %P, i64 2
		br label %join

		join:
		%idx = phi i64 [ 1, %if ], [ 2, %else ]
		%arrayidx4 = getelementptr inbounds i8, i8* %P, i64 %idx
		%ttmp5 = load i8, i8* %arrayidx4, align 1
		%conv6 = zext i8 %ttmp5 to i32
		ret i32 %conv6

		if.end:
		ret i32 52
		}

		define void @load_load_partial_alias_loop(i8* %P) {
		; LE-LABEL: @load_load_partial_alias_loop(
		; LE-NEXT: entry:
		; LE-NEXT: [[P_1:%.]] = getelementptr i8, i8 [[P:%.*]], i64 1
		; LE-NEXT: [[V_1:%.]] = load i8, i8 [[P_1]], align 1
		; LE-NEXT: call void @use.i8(i8 [[V_1]])
		; LE-NEXT: [[P_1_32:%.]] = bitcast i8 [[P_1]] to i32*
		; LE-NEXT: [[V_1_32:%.]] = load i32, i32 [[P_1_32]], align 4
		; LE-NEXT: call void @use.i32(i32 [[V_1_32]])
		; LE-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8
		; LE-NEXT: br label [[LOOP:%.*]]
		; LE: loop:
		; LE-NEXT: [[V_I:%.]] = phi i8 [ [[TMP0]], [[ENTRY:%.]] ], [ [[TMP2:%.]], [[LOOP_LOOP_CRIT_EDGE:%.]] ]
		; LE-NEXT: [[I:%.]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.]], [[LOOP_LOOP_CRIT_EDGE]] ]
		; LE-NEXT: [[P_I:%.]] = getelementptr i8, i8 [[P]], i64 [[I]]
		; LE-NEXT: call void @use.i8(i8 [[V_I]])
		; LE-NEXT: [[P_I_32:%.]] = bitcast i8 [[P_I]] to i32*
		; LE-NEXT: [[V_I_32:%.]] = load i32, i32 [[P_I_32]], align 4
		; LE-NEXT: call void @use.i32(i32 [[V_I_32]])
		; LE-NEXT: [[I_INC]] = add i64 [[I]], 1
		; LE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
		; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8
		; LE-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8
		; LE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
		; LE: loop.loop_crit_edge:
		; LE-NEXT: br label [[LOOP]]
		; LE: exit:
		; LE-NEXT: ret void
		;
		; BE-LABEL: @load_load_partial_alias_loop(
		; BE-NEXT: entry:
		; BE-NEXT: [[P_1:%.]] = getelementptr i8, i8 [[P:%.*]], i64 1
		; BE-NEXT: [[V_1:%.]] = load i8, i8 [[P_1]], align 1
		; BE-NEXT: call void @use.i8(i8 [[V_1]])
		; BE-NEXT: [[P_1_32:%.]] = bitcast i8 [[P_1]] to i32*
		; BE-NEXT: [[V_1_32:%.]] = load i32, i32 [[P_1_32]], align 4
		; BE-NEXT: call void @use.i32(i32 [[V_1_32]])
		; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24
		; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
		; BE-NEXT: br label [[LOOP:%.*]]
		; BE: loop:
		; BE-NEXT: [[V_I:%.]] = phi i8 [ [[TMP1]], [[ENTRY:%.]] ], [ [[TMP3:%.]], [[LOOP_LOOP_CRIT_EDGE:%.]] ]
		; BE-NEXT: [[I:%.]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.]], [[LOOP_LOOP_CRIT_EDGE]] ]
		; BE-NEXT: [[P_I:%.]] = getelementptr i8, i8 [[P]], i64 [[I]]
		; BE-NEXT: call void @use.i8(i8 [[V_I]])
		; BE-NEXT: [[P_I_32:%.]] = bitcast i8 [[P_I]] to i32*
		; BE-NEXT: [[V_I_32:%.]] = load i32, i32 [[P_I_32]], align 4
		; BE-NEXT: call void @use.i32(i32 [[V_I_32]])
		; BE-NEXT: [[I_INC]] = add i64 [[I]], 1
		; BE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64
		; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16
		; BE-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8
		; BE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]]
		; BE: loop.loop_crit_edge:
		; BE-NEXT: br label [[LOOP]]
		; BE: exit:
		; BE-NEXT: ret void
		;
		entry:
		%P.1 = getelementptr i8, i8* %P, i64 1
		%v.1 = load i8, i8* %P.1
		call void @use.i8(i8 %v.1)
		%P.1.32 = bitcast i8* %P.1 to i32*
		%v.1.32 = load i32, i32* %P.1.32
		call void @use.i32(i32 %v.1.32)
		br label %loop

		loop:
		%i = phi i64 [ 1, %entry ], [ %i.inc, %loop ]
		%P.i = getelementptr i8, i8* %P, i64 %i
		%v.i = load i8, i8* %P.i
		call void @use.i8(i8 %v.i)
		%P.i.32 = bitcast i8* %P.i to i32*
		%v.i.32 = load i32, i32* %P.i.32
		call void @use.i32(i32 %v.i.32)
		%i.inc = add i64 %i, 1
		%cmp = icmp ne i64 %i.inc, 64
		br i1 %cmp, label %loop, label %exit

		exit:
		ret void
		}

		declare void @use.i8(i8) readnone
		declare void @use.i32(i32) readnone

		@global = external local_unnamed_addr global i8, align 4

		define void @load_load_partial_alias_atomic(i8* %arg) {
		; CHECK-LABEL: @load_load_partial_alias_atomic(
		; CHECK-NEXT: bb:
		; CHECK-NEXT: [[TMP2_1:%.]] = getelementptr inbounds i8, i8 [[ARG:%.*]], i64 1
		; CHECK-NEXT: [[TMP2_2:%.]] = bitcast i8 [[TMP2_1]] to i64*
		; CHECK-NEXT: [[TMP2_3:%.]] = load i64, i64 [[TMP2_2]], align 4
		; CHECK-NEXT: [[TMP3_1:%.]] = getelementptr inbounds i8, i8 [[ARG]], i64 2
		; LE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 8
		; BE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 48
		; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
		; CHECK-NEXT: br label [[BB5:%.*]]
		; CHECK: bb5:
		; CHECK-NEXT: [[TMP4_1:%.]] = phi i8 [ [[TMP4_1_PRE:%.]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ]
		; CHECK-NEXT: [[TMP6_1:%.]] = load atomic i8, i8 @global acquire, align 4
		; CHECK-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]]
		; CHECK-NEXT: store i8 [[TMP7_1]], i8* [[ARG]], align 1
		; CHECK-NEXT: [[TMP4_1_PRE]] = load i8, i8* [[TMP3_1]], align 4
		; CHECK-NEXT: br label [[BB5]]
		;
		bb:
		%tmp1.1 = getelementptr inbounds i8, i8* %arg, i64 0
		%tmp2.1 = getelementptr inbounds i8, i8* %arg, i64 1
		%tmp2.2 = bitcast i8* %tmp2.1 to i64*
		%tmp2.3 = load i64, i64* %tmp2.2, align 4
		%tmp2.4 = icmp ugt i64 %tmp2.3, 1

		%tmp3.1 = getelementptr inbounds i8, i8* %arg, i64 2
		br label %bb5

		bb5: ; preds = %bb14, %bb
		%tmp4.1 = load i8, i8* %tmp3.1, align 4
		%tmp6.1 = load atomic i8, i8* getelementptr inbounds (i8, i8* @global, i64 0) acquire, align 4
		%tmp7.1 = add i8 %tmp6.1, %tmp4.1
		store i8 %tmp7.1, i8* %tmp1.1
		br label %bb5

		}

;;===----------------------------------------------------------------------===;;		;;===----------------------------------------------------------------------===;;
;; Load Widening		;; Load Widening
;; We explicitly choose NOT to widen. And are testing to make sure we don't.		;; We explicitly choose NOT to widen. And are testing to make sure we don't.
;;===----------------------------------------------------------------------===;;		;;===----------------------------------------------------------------------===;;

%widening1 = type { i32, i8, i8, i8, i8 }		%widening1 = type { i32, i8, i8, i8, i8 }

@f = global %widening1 zeroinitializer, align 4		@f = global %widening1 zeroinitializer, align 4

define i32 @test_widening1(i8* %P) nounwind ssp noredzone {		define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
; CHECK-LABEL: @test_widening1(		; CHECK-LABEL: @test_widening1(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1:%.]], %widening1 @f, i64 0, i32 1), align 4		; CHECK-NEXT: [[TTMP:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1:%.]], %widening1 @f, i64 0, i32 1), align 4
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP]] to i32		; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TTMP]] to i32
; CHECK-NEXT: [[TMP1:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1		; CHECK-NEXT: [[TTMP1:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1
; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i32		; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TTMP1]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]]		; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
; CHECK-NEXT: ret i32 [[ADD]]		; CHECK-NEXT: ret i32 [[ADD]]
;		;
entry:		entry:
%tmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4		%ttmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4
%conv = zext i8 %tmp to i32		%conv = zext i8 %ttmp to i32
%tmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1		%ttmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1
%conv2 = zext i8 %tmp1 to i32		%conv2 = zext i8 %ttmp1 to i32
%add = add nsw i32 %conv, %conv2		%add = add nsw i32 %conv, %conv2
ret i32 %add		ret i32 %add
}		}

define i32 @test_widening2() nounwind ssp noredzone {		define i32 @test_widening2() nounwind ssp noredzone {
; CHECK-LABEL: @test_widening2(		; CHECK-LABEL: @test_widening2(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1:%.]], %widening1 @f, i64 0, i32 1), align 4		; CHECK-NEXT: [[TTMP:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1:%.]], %widening1 @f, i64 0, i32 1), align 4
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP]] to i32		; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TTMP]] to i32
; CHECK-NEXT: [[TMP1:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1		; CHECK-NEXT: [[TTMP1:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 2), align 1
; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i32		; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TTMP1]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]]		; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]]
; CHECK-NEXT: [[TMP2:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 3), align 2		; CHECK-NEXT: [[TTMP2:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 3), align 2
; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP2]] to i32		; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TTMP2]] to i32
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CONV3]]		; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CONV3]]
; CHECK-NEXT: [[TMP3:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 4), align 1		; CHECK-NEXT: [[TTMP3:%.]] = load i8, i8 getelementptr inbounds ([[WIDENING1]], %widening1* @f, i64 0, i32 4), align 1
; CHECK-NEXT: [[CONV4:%.*]] = zext i8 [[TMP3]] to i32		; CHECK-NEXT: [[CONV4:%.*]] = zext i8 [[TTMP3]] to i32
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[CONV4]]		; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD2]], [[CONV4]]
; CHECK-NEXT: ret i32 [[ADD3]]		; CHECK-NEXT: ret i32 [[ADD3]]
;		;
entry:		entry:
%tmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4		%ttmp = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 1), align 4
%conv = zext i8 %tmp to i32		%conv = zext i8 %ttmp to i32
%tmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1		%ttmp1 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 2), align 1
%conv2 = zext i8 %tmp1 to i32		%conv2 = zext i8 %ttmp1 to i32
%add = add nsw i32 %conv, %conv2		%add = add nsw i32 %conv, %conv2

%tmp2 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 3), align 2		%ttmp2 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 3), align 2
%conv3 = zext i8 %tmp2 to i32		%conv3 = zext i8 %ttmp2 to i32
%add2 = add nsw i32 %add, %conv3		%add2 = add nsw i32 %add, %conv3

%tmp3 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 4), align 1		%ttmp3 = load i8, i8* getelementptr inbounds (%widening1, %widening1* @f, i64 0, i32 4), align 1
%conv4 = zext i8 %tmp3 to i32		%conv4 = zext i8 %ttmp3 to i32
%add3 = add nsw i32 %add2, %conv4		%add3 = add nsw i32 %add2, %conv4

ret i32 %add3		ret i32 %add3
}		}


declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind		declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind

Show All 14 Lines
declare void @use()		declare void @use()
declare void @use3(i8*, i8)		declare void @use3(i8*, i8)

; PR8908		; PR8908
define void @test_escape1() nounwind {		define void @test_escape1() nounwind {
; CHECK-LABEL: @test_escape1(		; CHECK-LABEL: @test_escape1(
; CHECK-NEXT: [[X:%.]] = alloca i8*, align 8		; CHECK-NEXT: [[X:%.]] = alloca i8*, align 8
; CHECK-NEXT: store i8** getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTV1X, i64 0, i64 2), i8*** [[X]], align 8		; CHECK-NEXT: store i8** getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTV1X, i64 0, i64 2), i8*** [[X]], align 8
; CHECK-NEXT: call void @use() #[[ATTR6]]		; CHECK-NEXT: call void @use() #[[ATTR3]]
; CHECK-NEXT: call void @use3(i8* [[X]], i8 getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTV1X, i64 0, i64 2)) #[[ATTR6]]		; CHECK-NEXT: call void @use3(i8* [[X]], i8 getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTV1X, i64 0, i64 2)) #[[ATTR3]]
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%x = alloca i8**, align 8		%x = alloca i8**, align 8
store i8** getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTV1X, i64 0, i64 2), i8*** %x, align 8		store i8** getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTV1X, i64 0, i64 2), i8*** %x, align 8
call void @use() nounwind		call void @use() nounwind
%DEAD = load i8, i8* %x, align 8		%DEAD = load i8, i8* %x, align 8
call void @use3(i8* %x, i8 %DEAD) nounwind		call void @use3(i8* %x, i8 %DEAD) nounwind
ret void		ret void
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[GVN] Clobber partially aliased loads.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 345367

llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h

llvm/lib/Analysis/MemoryDependenceAnalysis.cpp

llvm/lib/Transforms/Scalar/GVN.cpp

llvm/test/Transforms/GVN/PRE/rle.ll

[GVN] Clobber partially aliased loads.
ClosedPublic