Index: llvm/trunk/include/llvm/CodeGen/MachineInstr.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h @@ -1317,6 +1317,11 @@ /// modify the memrefs of the this MachineInstr. std::pair mergeMemRefsWith(const MachineInstr& Other); + /// Return the MIFlags which represent both MachineInstrs. This + /// should be used when merging two MachineInstrs into one. This routine does + /// not modify the MIFlags of this MachineInstr. + uint8_t mergeFlagsWith(const MachineInstr& Other) const; + /// Clear this MachineInstr's memory reference descriptor list. This resets /// the memrefs to their most conservative state. This should be used only /// as a last resort since it greatly pessimizes our knowledge of the memory Index: llvm/trunk/lib/CodeGen/MachineInstr.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineInstr.cpp +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp @@ -381,6 +381,12 @@ return std::make_pair(MemBegin, CombinedNumMemRefs); } +uint8_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const { + // For now, the just return the union of the flags. If the flags get more + // complicated over time, we might need more logic here. + return getFlags() | Other.getFlags(); +} + bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { assert(!isBundledWithPred() && "Must be called on bundle header"); for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { Index: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -702,7 +702,8 @@ .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) .add(BaseRegOp) .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*MergeMI)); + .setMemRefs(I->mergeMemRefsWith(*MergeMI)) + .setMIFlags(I->mergeFlagsWith(*MergeMI)); (void)MIB; DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n "); @@ -818,7 +819,8 @@ .add(RegOp1) .add(BaseRegOp) .addImm(OffsetImm) - .setMemRefs(I->mergeMemRefsWith(*Paired)); + .setMemRefs(I->mergeMemRefsWith(*Paired)) + .setMIFlags(I->mergeFlagsWith(*Paired)); (void)MIB; @@ -913,7 +915,8 @@ TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) .add(StMO) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) + .setMIFlags(LoadI->getFlags()); } else { // FIXME: Currently we disable this transformation in big-endian targets as // performance and correctness are verified only in little-endian. @@ -954,7 +957,8 @@ TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) .add(StMO) - .addImm(AndMaskEncoded); + .addImm(AndMaskEncoded) + .setMIFlags(LoadI->getFlags()); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), @@ -962,7 +966,8 @@ DestReg) .add(StMO) .addImm(Immr) - .addImm(Imms); + .addImm(Imms) + .setMIFlags(LoadI->getFlags()); } } @@ -1352,7 +1357,8 @@ .add(getLdStRegOp(*I)) .add(getLdStBaseOp(*I)) .addImm(Value) - .setMemRefs(I->memoperands_begin(), I->memoperands_end()); + .setMemRefs(I->memoperands_begin(), I->memoperands_end()) + .setMIFlags(I->mergeFlagsWith(*Update)); } else { // Paired instruction. int Scale = getMemScale(*I); @@ -1362,7 +1368,8 @@ .add(getLdStRegOp(*I, 1)) .add(getLdStBaseOp(*I)) .addImm(Value / Scale) - .setMemRefs(I->memoperands_begin(), I->memoperands_end()); + .setMemRefs(I->memoperands_begin(), I->memoperands_end()) + .setMIFlags(I->mergeFlagsWith(*Update)); } (void)MIB; Index: llvm/trunk/test/CodeGen/AArch64/ldst-miflags.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/ldst-miflags.mir +++ llvm/trunk/test/CodeGen/AArch64/ldst-miflags.mir @@ -0,0 +1,99 @@ +# RUN: llc -run-pass=aarch64-ldst-opt -o - -mtriple=aarch64-- %s | FileCheck %s +# Check that we merge the MIFlags from both the instructions in the final +# instruction. +--- +name: case11 +# CHECK-LABEL: name: case11 +body: | + bb.0: + frame-setup STRWui $w1, $x0, 1 :: (store 4) + $w1 = frame-destroy LDRWui $x0, 1 :: (load 4) + + ; CHECK: frame-setup STRWui + ; CHECK-NOT: frame-setup + ; CHECK-NEXT: frame-destroy ORRWrs + ; No merging happening here, make sure we keep the flags of the previous + ; instruction. + RET_ReallyLR + +... +--- +name: case12 +# CHECK-LABEL: name: case12 +body: | + bb.0: + frame-setup STRWui $w1, $x0, 1 :: (store 4) + $w2 = frame-destroy LDRHHui $x0, 2 :: (load 2) + + ; CHECK: frame-setup STRWui + ; CHECK-NOT: frame-setup + ; CHECK-NEXT: frame-destroy ANDWri + ; No merging happening here, make sure we keep the flags of the previous + ; instruction. + RET_ReallyLR + +... +--- +name: case13 +# CHECK-LABEL: name: case13 +body: | + bb.0: + frame-setup STRWui $w1, $x0, 1 :: (store 4) + $w2 = frame-destroy LDRHHui $x0, 3 :: (load 2) + + ; CHECK: frame-setup STRWui + ; CHECK-NOT: frame-setup + ; CHECK-NEXT: frame-destroy UBFMWri + ; No merging happening here, make sure we keep the flags of the previous + ; instruction. + RET_ReallyLR + +... +--- +name: case2 +# CHECK-LABEL: name: case2 +body: | + bb.0: + frame-setup STRHHui $wzr, $x0, 0 :: (store 4) + frame-destroy STRHHui $wzr, $x0, 1 :: (store 4) + + ; CHECK: frame-setup frame-destroy STRWui + RET_ReallyLR + +... +--- +name: case3 +# CHECK-LABEL: name: case3 +body: | + bb.0: + + $x0 = frame-setup LDRXui $x2, 0 :: (load 8) + $x1 = frame-destroy LDRXui $x2, 1 :: (load 8) + + ; CHECK: frame-setup frame-destroy LDPXi + RET_ReallyLR +... +--- +name: case4 +# CHECK-LABEL: name: case4 +body: | + bb.0: + $x26, $x25 = frame-setup LDPXi $sp, 0 + $sp = frame-destroy ADDXri $sp, 64, 0 + + ; CHECK: = frame-setup frame-destroy LDPXpost + RET_ReallyLR + +... +--- +name: case41 +# CHECK-LABEL: name: case41 +body: | + bb.0: + $x26 = frame-setup LDRXui $sp, 0 + $sp = frame-destroy ADDXri $sp, 64, 0 + + ; CHECK: = frame-setup frame-destroy LDRXpost + RET_ReallyLR + +...