diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1113,6 +1113,7 @@ bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) { + OtherMI = nullptr; unsigned Opcode = MI.getOpcode(); bool IsDiv, IsSigned; @@ -1167,11 +1168,44 @@ matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) && matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) { OtherMI = &UseMI; - return true; + break; } } - - return false; + if (!OtherMI) + return false; + + // We may have a situation like this: + // %4:_(s32) = G_SEXT %2:_(s1) + // %5:_(s32) = G_SEXT %2:_(s1) + // %6:_(s32) = G_UDIV %4:_, %5:_ + // %8:_(s32) = G_SEXT %2:_(s1) + // %9:_(s32) = G_UREM %5:_, %8:_ + // and choosing the insertion point as the G_UDIV will cause it to use %8 + // before the def. We check here if any of the operands of the later + // instruction (i.e. one of DIV/REM that is the second in the block) are + // dominated by the first instruction. In this case we check if %8 is + // dominated by the G_UDIV and bail out if so. + + SmallSet RegsToCheck; + MachineInstr *First, *Second; + if (dominates(MI, *OtherMI)) { + First = &MI; + Second = OtherMI; + } else { + First = OtherMI; + Second = &MI; + } + RegsToCheck.insert(Second->getOperand(1).getReg()); + RegsToCheck.insert(Second->getOperand(2).getReg()); + for (MachineBasicBlock::iterator II = std::next(First->getIterator()); + II != Second->getIterator(); ++II) { + for (auto &MO : II->operands()) { + if (MO.isReg() && MO.isDef() && RegsToCheck.count(MO.getReg()) && + dominates(*First, *II)) + return false; + } + } + return true; } void CombinerHelper::applyCombineDivRem(MachineInstr &MI, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-conflict.mir @@ -0,0 +1,121 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s + +# Check that we don't combine to G_UDIVREM if it would cause a use-before-def +--- +name: test +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: test + ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[SEXT1]], [[SEXT2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UREM]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UDIV]](s32) + ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64) + ; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s16) = G_CONSTANT i16 0 + %2:_(s1) = G_CONSTANT i1 true + %1:_(s1) = G_ICMP intpred(sge), %0(s16), %0 + %3:_(s8) = G_SEXT %2(s1) + %4:_(s32) = G_SEXT %3(s8) + %5:_(s32) = G_SEXT %1(s1) + %6:_(s32) = G_UDIV %4, %5 + %7:_(s32) = COPY %5(s32) + %8:_(s32) = G_SEXT %2(s1) + %9:_(s32) = G_UREM %7, %8 + %10:_(s8) = G_TRUNC %9(s32) + %11:_(s64) = G_ZEXT %6(s32) + %12:_(s64) = G_SEXT %10(s8) + %13:_(s64) = G_OR %11, %12 + %14:_(s32) = G_TRUNC %13(s64) + $w0 = COPY %14(s32) + RET_ReallyLR implicit $w0 + +... + +# Check with the div and rem the other way around +--- +name: test_inverted_div_rem +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: test_inverted_div_rem + ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[SEXT1]], [[SEXT2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UDIV]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UREM]](s32) + ; CHECK-NEXT: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64) + ; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s16) = G_CONSTANT i16 0 + %2:_(s1) = G_CONSTANT i1 true + %1:_(s1) = G_ICMP intpred(sge), %0(s16), %0 + %3:_(s8) = G_SEXT %2(s1) + %4:_(s32) = G_SEXT %3(s8) + %5:_(s32) = G_SEXT %1(s1) + %6:_(s32) = G_UREM %4, %5 + %7:_(s32) = COPY %5(s32) + %8:_(s32) = G_SEXT %2(s1) + %9:_(s32) = G_UDIV %7, %8 + %10:_(s8) = G_TRUNC %9(s32) + %11:_(s64) = G_ZEXT %6(s32) + %12:_(s64) = G_SEXT %10(s8) + %13:_(s64) = G_OR %11, %12 + %14:_(s32) = G_TRUNC %13(s64) + $w0 = COPY %14(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ok_before_first +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: ok_before_first + ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) + ; CHECK-NEXT: [[UDIVREM:%[0-9]+]]:_(s32), [[UDIVREM1:%[0-9]+]]:_ = G_UDIVREM [[SEXT]], [[SEXT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UDIVREM1]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UDIVREM]](s32) + ; CHECK-NEXT: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SEXT2]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[OR]](s64) + ; CHECK-NEXT: $w0 = COPY [[TRUNC1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s16) = G_CONSTANT i16 0 + %2:_(s1) = G_CONSTANT i1 true + %1:_(s1) = G_ICMP intpred(sge), %0(s16), %0 + %3:_(s8) = G_SEXT %2(s1) + %4:_(s32) = G_SEXT %3(s8) + %5:_(s32) = G_SEXT %1(s1) + %8:_(s32) = G_SEXT %2(s1) + %6:_(s32) = G_UDIV %4, %5 + %7:_(s32) = COPY %5(s32) + %9:_(s32) = G_UREM %7, %8 + %10:_(s8) = G_TRUNC %9(s32) + %11:_(s64) = G_ZEXT %6(s32) + %12:_(s64) = G_SEXT %10(s8) + %13:_(s64) = G_OR %11, %12 + %14:_(s32) = G_TRUNC %13(s64) + $w0 = COPY %14(s32) + RET_ReallyLR implicit $w0 + +...