Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1342,28 +1342,28 @@ if (Update->getOpcode() == AArch64::SUBXri) Value = -Value; - unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) - : getPostIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB; - if (!isPairedLdSt(*I)) { - // Non-paired instruction. - MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .add(getLdStRegOp(*Update)) - .add(getLdStRegOp(*I)) - .add(getLdStBaseOp(*I)) - .addImm(Value) - .setMemRefs(I->memoperands_begin(), I->memoperands_end()); + // We need to use writeback only when the base register is used afterwards. + bool UseWriteback = !getLdStBaseOp(*I).isKill(); + unsigned NewOpc; + if (UseWriteback) { + NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode()) + : getPostIndexedOpcode(I->getOpcode()); } else { - // Paired instruction. - int Scale = getMemScale(*I); - MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .add(getLdStRegOp(*Update)) - .add(getLdStRegOp(*I, 0)) - .add(getLdStRegOp(*I, 1)) - .add(getLdStBaseOp(*I)) - .addImm(Value / Scale) - .setMemRefs(I->memoperands_begin(), I->memoperands_end()); + assert(IsPreIdx); + NewOpc = I->getOpcode(); } + int Scale = (!UseWriteback || isPairedLdSt(*I)) ? getMemScale(*I) : 1; + MachineInstrBuilder MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(NewOpc)); + if (UseWriteback) + MIB.add(getLdStRegOp(*Update)); + if (isPairedLdSt(*I)) + MIB.add(getLdStRegOp(*I, 0)).add(getLdStRegOp(*I, 1)); + else + MIB.add(getLdStRegOp(*I)); + MIB.add(getLdStBaseOp(*I)) + .addImm(Value / Scale) + .setMemRefs(I->memoperands_begin(), I->memoperands_end()); (void)MIB; if (IsPreIdx) { @@ -1503,18 +1503,21 @@ unsigned BaseReg = getLdStBaseOp(MemMI).getReg(); int Offset = getLdStOffsetOp(MemMI).getImm(); + bool UseWriteback = !getLdStBaseOp(MemMI).isKill(); // If the load/store is the first instruction in the block, there's obviously // not any matching update. Ditto if the memory offset isn't zero. if (MBBI == B || Offset != 0) return E; - // If the base register overlaps a destination register, we can't - // merge the update. + // If the base register overlaps a destination register, and we need to use + // writeback, then we can't merge the update. bool IsPairedInsn = isPairedLdSt(MemMI); - for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { - unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; + if (UseWriteback) { + for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) { + unsigned DestReg = getLdStRegOp(MemMI, i).getReg(); + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + } } // Track which registers have been modified and used between the first insn Index: test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- test/CodeGen/AArch64/ldst-opt.ll +++ test/CodeGen/AArch64/ldst-opt.ll @@ -273,7 +273,7 @@ ; ... ; ldr X, [x8] ; -> -; ldr X, [x8, #16]! +; ldr X, [x8, #16] ; ; with X being either w0, x0, s0, d0 or q0. @@ -286,7 +286,7 @@ define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond, %pre.struct.i32* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-word2 -; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]! +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i32*, %pre.struct.i32** %this @@ -304,7 +304,7 @@ define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond, %pre.struct.i64* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-doubleword2 -; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]! +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i64*, %pre.struct.i64** %this @@ -322,7 +322,7 @@ define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond, %pre.struct.i128* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-quadword2 -; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]! +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i128*, %pre.struct.i128** %this @@ -340,7 +340,7 @@ define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond, %pre.struct.float* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-float2 -; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]! +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.float*, %pre.struct.float** %this @@ -358,7 +358,7 @@ define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond, %pre.struct.double* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-double2 -; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]! +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.double*, %pre.struct.double** %this @@ -376,7 +376,7 @@ define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond, %pre.struct.i32* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-word3 -; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]! +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i32*, %pre.struct.i32** %this @@ -394,7 +394,7 @@ define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond, %pre.struct.i64* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-doubleword3 -; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]! +; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i64*, %pre.struct.i64** %this @@ -412,7 +412,7 @@ define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond, %pre.struct.i128* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-quadword3 -; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]! +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i128*, %pre.struct.i128** %this @@ -430,7 +430,7 @@ define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond, %pre.struct.float* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-float3 -; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]! +; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.float*, %pre.struct.float** %this @@ -448,7 +448,7 @@ define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond, %pre.struct.double* %load2) nounwind { ; CHECK-LABEL: load-pre-indexed-double3 -; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]! +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.double*, %pre.struct.double** %this @@ -469,7 +469,7 @@ ; ... ; str X, [x8] ; -> -; str X, [x8, #16]! +; str X, [x8, #16] ; ; with X being either w0, x0, s0, d0 or q0. @@ -477,7 +477,7 @@ %pre.struct.i32* %load2, i32 %val) nounwind { ; CHECK-LABEL: store-pre-indexed-word2 -; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]! +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i32*, %pre.struct.i32** %this @@ -496,7 +496,7 @@ %pre.struct.i64* %load2, i64 %val) nounwind { ; CHECK-LABEL: store-pre-indexed-doubleword2 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]! +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i64*, %pre.struct.i64** %this @@ -515,7 +515,7 @@ %pre.struct.i128* %load2, <2 x i64> %val) nounwind { ; CHECK-LABEL: store-pre-indexed-quadword2 -; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]! +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i128*, %pre.struct.i128** %this @@ -534,7 +534,7 @@ %pre.struct.float* %load2, float %val) nounwind { ; CHECK-LABEL: store-pre-indexed-float2 -; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]! +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.float*, %pre.struct.float** %this @@ -553,7 +553,7 @@ %pre.struct.double* %load2, double %val) nounwind { ; CHECK-LABEL: store-pre-indexed-double2 -; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]! +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.double*, %pre.struct.double** %this @@ -572,7 +572,7 @@ %pre.struct.i32* %load2, i32 %val) nounwind { ; CHECK-LABEL: store-pre-indexed-word3 -; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]! +; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i32*, %pre.struct.i32** %this @@ -591,7 +591,7 @@ %pre.struct.i64* %load2, i64 %val) nounwind { ; CHECK-LABEL: store-pre-indexed-doubleword3 -; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]! +; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i64*, %pre.struct.i64** %this @@ -610,7 +610,7 @@ %pre.struct.i128* %load2, <2 x i64> %val) nounwind { ; CHECK-LABEL: store-pre-indexed-quadword3 -; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]! +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.i128*, %pre.struct.i128** %this @@ -629,7 +629,7 @@ %pre.struct.float* %load2, float %val) nounwind { ; CHECK-LABEL: store-pre-indexed-float3 -; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]! +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.float*, %pre.struct.float** %this @@ -648,7 +648,7 @@ %pre.struct.double* %load2, double %val) nounwind { ; CHECK-LABEL: store-pre-indexed-double3 -; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]! +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16] br i1 %cond, label %if.then, label %if.end if.then: %load1 = load %pre.struct.double*, %pre.struct.double** %this Index: test/CodeGen/AArch64/ldst-opt.mir =================================================================== --- test/CodeGen/AArch64/ldst-opt.mir +++ test/CodeGen/AArch64/ldst-opt.mir @@ -181,3 +181,33 @@ # CHECK-NOT: LDRXui # CHECK-NOT: ORR # CHECK: BL $bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp +--- +name: pre-index-overlap-killed-base +tracksRegLiveness: true +body: | + bb.0: + liveins: %x0 + + %x0 = ADDXri killed %x0, 4, 0 + %w0 = LDRWui killed %x0, 0 +... +# When the base register overlaps the load register we can optimise by not +# using writeback when the base is killed +# CHECK-LABEL: name: pre-index-overlap-killed-base +# CHECK: %w0 = LDRWui killed %x0, 1 +--- +name: pre-index-overlap-live-base +tracksRegLiveness: true +body: | + bb.0: + liveins: %x0 + + %x0 = ADDXri killed %x0, 4, 0 + %w0 = LDRWui %x0, 0 +... +# When the base register overlaps the load register we can't optimise if the +# base register is live +# CHECK-LABEL: name: pre-index-overlap-live-base +# CHECK-NOT: %w0 = LDRWui killed %x0, 1 +# CHECK: %x0 = ADDXri killed %x0, 4, 0 +# CHECK: %w0 = LDRWui %x0, 0