Index: llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1569,6 +1569,14 @@ !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && !mayAlias(MI, MemInsns, AA)) { + // If the BaseReg has been modified, then cannot do the optimization. + // for example, in the following pattern + // ldr x1 [x2] + // ldr x2 [x3] + // ldr x4 [x2, #8], + // the first and third ldr cannot be converted to ldp x1, x4, [x2] + if (!ModifiedRegUnits.available(BaseReg)) + return E; Flags.setMergeForward(false); Flags.clearRenameReg(); return MBBI; @@ -1583,6 +1591,8 @@ !mayAlias(FirstMI, MemInsns, AA)) { if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { + if (!ModifiedRegUnits.available(BaseReg)) + return E; Flags.setMergeForward(true); Flags.clearRenameReg(); return MBBI; Index: llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-ldst-modified-baseReg.mir @@ -0,0 +1,120 @@ +# RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -run-pass=aarch64-ldst-opt %s -o - | FileCheck %s +# +# The test below test that when the AArch64 Load Store Optimization pass tries to +# convert to load instructions into a ldp instruction, and when the base register of +# the second ldr instruction has been modified in between these two ldr instructions, +# the convertion should not occur. +# +# For example, for the following pattern: +# ldr x9 [x10] +# ldr x10 [x11] +# ldr x10 [x10, 8], +# the first and third ldr instructions cannot be converted to ldp x9, x10, [x10]. +# +# The pattern we check in this test file is the following pattern in function main: +# renamable $x9 = LDRXui killed renamable $x10, 1 :: (dereferenceable load 8 from `i32** getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 5)`, !tbaa !0) +# renamable $x10 = LDURXi renamable $x8, 4 :: (dereferenceable load 8 from `i32**** getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 7)`, !tbaa !0) +# renamable $x10 = LDRXui killed renamable $x10, 0 :: (load 8 from %ir.3, !tbaa !0) + +# The first and thrid LDR instruction cannot be converted into a LDP instruction. +# +# CHECK-NOT: LDP + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-linux-gnu" + + @o = internal global i32* undef, align 8 + @_MergedGlobals = private global <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }> <{ i8 -1, [3 x i8] zeroinitializer, i8 104, [3 x i8] zeroinitializer, i8 19, [3 x i8] zeroinitializer, i32 5, i32*** bitcast (i32** @o to i32***), i32** getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 5) }>, align 8 + @_MergedGlobals.1 = private global <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }> zeroinitializer, align 8 + + ; Function Attrs: nofree nounwind + define dso_local i32 @main() local_unnamed_addr #0 { + ae.exit.i: + %0 = load volatile i32**, i32*** getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 8), align 8, !tbaa !0 + store i32* getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 3), i32** %0, align 8, !tbaa !0 + %1 = load i32*, i32** getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 5), align 8, !tbaa !0 + %2 = load i32, i32* %1, align 4, !tbaa !4 + %3 = load i32***, i32**** getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 7), align 8, !tbaa !0 + %4 = load i32**, i32*** %3, align 8, !tbaa !0 + %5 = load i32*, i32** %4, align 8, !tbaa !0 + store i32 %2, i32* %5, align 4, !tbaa !4 + %l.promoted = load i32, i32* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 6), align 4, !tbaa !4 + br label %for.body.i + + for.body.i: ; preds = %for.body.i, %ae.exit.i + %and.i3 = phi i32 [ %l.promoted, %ae.exit.i ], [ %and.i, %for.body.i ] + %and.i = and i32 %and.i3, 9 + %tobool.i = icmp eq i32 undef, 0 + br i1 %tobool.i, label %ac.exit, label %for.body.i + + ac.exit: ; preds = %for.body.i + store i32 %and.i, i32* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 6), align 4, !tbaa !4 + store i1 true, i1* getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 0), align 4 + br i1 undef, label %if.else.i, label %ag.exit + + if.else.i: ; preds = %ac.exit + %6 = load i32, i32* getelementptr inbounds ([1 x [1 x i32]], [1 x [1 x i32]]* getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 2), i64 6, i64 0, i64 0), align 4, !tbaa !4 + store i8 undef, i8* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 0), align 4, !tbaa !6 + br label %ag.exit + + ag.exit: ; preds = %if.else.i, %ac.exit + %7 = load i8, i8* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 4), align 4, !tbaa !6 + %8 = load volatile i8, i8* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 2), align 4, !tbaa !6 + ret i32 0 + } + + attributes #0 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="tsv110" "target-features"="+aes,+crc,+crypto,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rdm,+sha2,+spe,+v8.2a" "unsafe-fp-math"="false" "use-soft-float"="false" } + + !0 = !{!1, !1, i64 0} + !1 = !{!"any pointer", !2, i64 0} + !2 = !{!"omnipotent char", !3, i64 0} + !3 = !{!"Simple C/C++ TBAA"} + !4 = !{!5, !5, i64 0} + !5 = !{!"int", !2, i64 0} + !6 = !{!2, !2, i64 0} + +... +--- +name: main +tracksRegLiveness: true +body: | + bb.0.ae.exit.i: + successors: %bb.1(0x80000000) + + $x8 = ADRP target-flags(aarch64-page) @_MergedGlobals + 12 + renamable $x8 = ADDXri killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals + 12, 0 + renamable $x9 = LDURXi renamable $x8, 12 :: (volatile dereferenceable load 8 from `i32*** getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 8)`, !tbaa !0) + $x10 = ADRP target-flags(aarch64-page) @_MergedGlobals.1 + 8 + renamable $x10 = ADDXri killed $x10, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals.1 + 8, 0 + STRXui renamable $x10, killed renamable $x9, 0 :: (store 8 into %ir.0, !tbaa !0) + renamable $x9 = LDRXui killed renamable $x10, 1 :: (dereferenceable load 8 from `i32** getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 5)`, !tbaa !0) + renamable $x10 = LDURXi renamable $x8, 4 :: (dereferenceable load 8 from `i32**** getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 7)`, !tbaa !0) + renamable $x10 = LDRXui killed renamable $x10, 0 :: (load 8 from %ir.3, !tbaa !0) + renamable $w9 = LDRWui killed renamable $x9, 0 :: (load 4 from %ir.1, !tbaa !4) + renamable $x10 = LDRXui killed renamable $x10, 0 :: (load 8 from %ir.4, !tbaa !0) + STRWui killed renamable $w9, killed renamable $x10, 0 :: (store 4 into %ir.5, !tbaa !4) + renamable $w8 = LDRWui killed renamable $x8, 0 :: (dereferenceable load 4 from `i32* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 6)`, !tbaa !4) + renamable $w9 = MOVZWi 9, 0 + + bb.1.for.body.i (align 4): + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + liveins: $w8, $w9 + + $w8 = ANDWrs killed renamable $w8, renamable $w9, 0 + CBNZW $wzr, %bb.1 + + bb.2.ac.exit: + liveins: $w8 + + renamable $x9 = ADRP target-flags(aarch64-page) @_MergedGlobals + 12 + STRWui killed renamable $w8, killed renamable $x9, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals + 12 :: (store 4 into `i32* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 6)`, !tbaa !4) + renamable $x8 = ADRP target-flags(aarch64-page) @_MergedGlobals.1 + renamable $w9 = MOVZWi 1, 0 + STRBBui killed renamable $w9, killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals.1 :: (store 1 into `i1* getelementptr inbounds (<{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>, <{ i1, [3 x i8], [1 x [1 x i32]], i32, [4 x i8], i32* }>* @_MergedGlobals.1, i32 0, i32 0)`, align 8) + renamable $x8 = ADRP target-flags(aarch64-page) @_MergedGlobals + 4 + dead $wzr = LDRBBui killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals + 4 :: (volatile dereferenceable load 1 from `i8* getelementptr inbounds (<{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>, <{ i8, [3 x i8], i8, [3 x i8], i8, [3 x i8], i32, i32***, i32** }>* @_MergedGlobals, i32 0, i32 2)`, align 4, !tbaa !6) + $w0 = ORRWrs $wzr, $wzr, 0 + RET undef $lr, implicit killed $w0 + +...