diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -873,11 +873,13 @@ MachineInstrBuilder MIB1 = BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); + .addReg(RegY, getKillRegState(KillY)) + .setMIFlags(Prev.getFlags()); MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); + .addReg(NewVR, getKillRegState(true)) + .setMIFlags(Root.getFlags()); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6214,6 +6214,14 @@ if (MUL) DelInstrs.push_back(MUL); DelInstrs.push_back(&Root); + + // Set the flags on the inserted instructions to be the merged flags of the + // instructions that we have combined. + uint16_t Flags = Root.getFlags(); + if (MUL) + Flags = Root.mergeFlagsWith(*MUL); + for (auto *MI : InsInstrs) + MI->setFlags(Flags); } /// Replace csincr-branch sequence by simple conditional branch diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir @@ -6,7 +6,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] +# CHECK-NEXT: :fpr32 = nnan ninf nsz arcp contract afn reassoc FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_fast alignment: 4 @@ -45,7 +45,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] +# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract @@ -125,7 +125,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] +# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract_op1 @@ -206,7 +206,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] +# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_fast alignment: 4 @@ -245,7 +245,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] +# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract alignment: 4 @@ -324,7 +324,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] +# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract_op1 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir @@ -0,0 +1,132 @@ +# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE +# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE + +# fadd without the reassoc flags can be reassociate only when unsafe fp math is +# enabled. +# CHECK-LABEL: name: fadd_no_reassoc +# CHECK: %4:fpr32 = FADDSrr %0, %1 +# CHECK-SAFE-NEXT: %5:fpr32 = FADDSrr killed %4, %2 +# CHECK-SAFE-NEXT: %6:fpr32 = FADDSrr killed %5, %3 +# CHECK-UNSAFE-NEXT: %9:fpr32 = FADDSrr %2, %3 +# CHECK-UNSAFE-NEXT: %6:fpr32 = FADDSrr killed %4, killed %9 +--- +name: fadd_no_reassoc +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32 } + - { id: 1, class: fpr32 } + - { id: 2, class: fpr32 } + - { id: 3, class: fpr32 } + - { id: 4, class: fpr32 } + - { id: 5, class: fpr32 } + - { id: 6, class: fpr32 } +liveins: + - { reg: '$s0', virtual-reg: '%0' } + - { reg: '$s1', virtual-reg: '%1' } + - { reg: '$s2', virtual-reg: '%2' } + - { reg: '$s3', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $s0, $s1, $s2, $s3 + + %3:fpr32 = COPY $s3 + %2:fpr32 = COPY $s2 + %1:fpr32 = COPY $s1 + %0:fpr32 = COPY $s0 + %4:fpr32 = FADDSrr %0, %1 + %5:fpr32 = FADDSrr killed %4, %2 + %6:fpr32 = FADDSrr killed %5, %3 + $s0 = COPY %6 + RET_ReallyLR implicit $s0 + + +# FIXME: We should be able to reassociate without unsafe fp math, but currently +# the reassoc flag is ignored. +# CHECK-LABEL: name: fadd_reassoc +# CHECK: %4:fpr32 = reassoc FADDSrr %0, %1 +# CHECK-SAFE-NEXT: %5:fpr32 = reassoc FADDSrr killed %4, %2 +# CHECK-SAFE-NEXT: %6:fpr32 = reassoc FADDSrr killed %5, %3 +# CHECK-UNSAFE-NEXT: %9:fpr32 = reassoc FADDSrr %2, %3 +# CHECK-UNSAFE-NEXT: %6:fpr32 = reassoc FADDSrr killed %4, killed %9 +--- +name: fadd_reassoc +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32 } + - { id: 1, class: fpr32 } + - { id: 2, class: fpr32 } + - { id: 3, class: fpr32 } + - { id: 4, class: fpr32 } + - { id: 5, class: fpr32 } + - { id: 6, class: fpr32 } +liveins: + - { reg: '$s0', virtual-reg: '%0' } + - { reg: '$s1', virtual-reg: '%1' } + - { reg: '$s2', virtual-reg: '%2' } + - { reg: '$s3', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $s0, $s1, $s2, $s3 + + %3:fpr32 = COPY $s3 + %2:fpr32 = COPY $s2 + %1:fpr32 = COPY $s1 + %0:fpr32 = COPY $s0 + %4:fpr32 = reassoc FADDSrr %0, %1 + %5:fpr32 = reassoc FADDSrr killed %4, %2 + %6:fpr32 = reassoc FADDSrr killed %5, %3 + $s0 = COPY %6 + RET_ReallyLR implicit $s0 + + +# Check that flags on the instructions are preserved after reassociation. +# CHECK-LABEL: name: fadd_flags +# CHECK: %4:fpr32 = nsz FADDSrr %0, %1 +# CHECK-SAFE-NEXT: %5:fpr32 = nnan FADDSrr killed %4, %2 +# CHECK-SAFE-NEXT: %6:fpr32 = ninf FADDSrr killed %5, %3 +# CHECK-UNSAFE-NEXT: %9:fpr32 = nnan FADDSrr %2, %3 +# CHECK-UNSAFE-NEXT: %6:fpr32 = ninf FADDSrr killed %4, killed %9 +--- +name: fadd_flags +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32 } + - { id: 1, class: fpr32 } + - { id: 2, class: fpr32 } + - { id: 3, class: fpr32 } + - { id: 4, class: fpr32 } + - { id: 5, class: fpr32 } + - { id: 6, class: fpr32 } +liveins: + - { reg: '$s0', virtual-reg: '%0' } + - { reg: '$s1', virtual-reg: '%1' } + - { reg: '$s2', virtual-reg: '%2' } + - { reg: '$s3', virtual-reg: '%3' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $s0, $s1, $s2, $s3 + + %3:fpr32 = COPY $s3 + %2:fpr32 = COPY $s2 + %1:fpr32 = COPY $s1 + %0:fpr32 = COPY $s0 + %4:fpr32 = nsz FADDSrr %0, %1 + %5:fpr32 = nnan FADDSrr killed %4, %2 + %6:fpr32 = ninf FADDSrr killed %5, %3 + $s0 = COPY %6 + RET_ReallyLR implicit $s0