Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -795,6 +795,7 @@ int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); unsigned LdRt = getLdStRegOp(*LoadI).getReg(); + const MachineOperand &StMO = getLdStRegOp(*StoreI); unsigned StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); @@ -819,7 +820,7 @@ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) - .addReg(StRt) + .add(StMO) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // FIXME: Currently we disable this transformation in big-endian targets as @@ -860,23 +861,31 @@ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(AndMaskEncoded); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(Immr) .addImm(Imms); } } - // Clear kill flags between store and load. + // Clear kill flags between store and load and also propagate the kill flag + // to the promoted operand. for (MachineInstr &MI : make_range(StoreI->getIterator(), BitExtMI->getIterator())) - MI.clearRegisterKills(StRt, TRI); + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + // Load-store promotion is only legal if StRt is not redefined between + // the store and load instruction, so we can just propagate the kill flag + // and break after we found one. + BitExtMI->addRegisterKilled(StRt, TRI); + break; + } DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); Index: test/CodeGen/AArch64/ldst-opt.mir =================================================================== --- test/CodeGen/AArch64/ldst-opt.mir +++ test/CodeGen/AArch64/ldst-opt.mir @@ -34,7 +34,7 @@ # Don't count transient instructions towards search limits. # CHECK-LABEL: name: promote-load-from-store # CHECK: STRWui %w1 -# CHECK: UBFMWri %w1 +# CHECK: UBFMWri killed %w1 --- name: store-pair tracksRegLiveness: true @@ -137,10 +137,29 @@ HINT 0, implicit %w11 ; some use of %w11 ... # When replaceing the load of a store-load pair with a copy the kill flags -# along the way need to be cleared. +# along the way need to be cleared and the kill flags needs to be set on the +# promoted operand. # CHECK-LABEL: name: store-load-clearkill # CHECK: STRWui %w1, %sp, 0 :: (store 4) # CHECK-NOT: COPY killed %w1 # CHECK: %wzr = COPY %w1 -# CHECK: %w11 = ORRWrs %wzr, %w1, 0 +# CHECK: %w11 = ORRWrs %wzr, killed %w1, 0 # CHECK: HINT 0, implicit %w11 +--- +name: promote-load-from-store-undef +tracksRegLiveness: true +body: | + bb.0: + liveins: %w1, %x0, %x2, %lr + + STRWui undef %w1, %x0, 0 :: (store 4) + %w0 = LDRBBui %x0, 1 :: (load 2) + STRHHui undef %w3, %x2, 0 :: (store 4) + %w1 = LDRBBui %x2, 0 :: (load 4) + RET %lr, implicit %w0 +... +# CHECK-LABEL: name: promote-load-from-store-undef +# CHECK: STRWui undef %w1 +# CHECK: UBFMWri undef %w1 +# CHECK: STRHHui undef %w3 +# CHECK: ANDWri undef %w3