diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -712,8 +712,8 @@ if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg())) MI2 = MRI.getUniqueVRegDef(Op2.getReg()); - // And they need to be in the trace (otherwise, they won't have a depth). - return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB; + // And at least one operand must be defined in MBB. + return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB); } bool TargetInstrInfo::areOpcodesEqualOrInverse(unsigned Opcode1, @@ -1042,6 +1042,10 @@ break; } + // Don't reassociate if Prev and Root are in different blocks. + if (Prev->getParent() != Root.getParent()) + return; + assert(Prev && "Unknown pattern for machine combiner"); reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate-ops-in-different-blocks.mir @@ -44,8 +44,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr - ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr - ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr + ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], [[LDRQui1]], implicit $fpcr + ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 killed [[FADDv4f32_1]], killed [[FADDv4f32_]], implicit $fpcr ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]] diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -3326,14 +3326,14 @@ ; RV32IF-NEXT: seqz a6, a1 ; RV32IF-NEXT: .LBB47_7: # %entry ; RV32IF-NEXT: neg a6, a6 -; RV32IF-NEXT: and a3, a6, a3 ; RV32IF-NEXT: xori a1, a1, 1 ; RV32IF-NEXT: or a1, a1, a0 ; RV32IF-NEXT: seqz a1, a1 ; RV32IF-NEXT: addi a1, a1, -1 ; RV32IF-NEXT: and a3, a1, a3 -; RV32IF-NEXT: and a4, a6, a4 +; RV32IF-NEXT: and a3, a3, a6 ; RV32IF-NEXT: and a1, a1, a4 +; RV32IF-NEXT: and a1, a1, a6 ; RV32IF-NEXT: neg a4, a5 ; RV32IF-NEXT: and a4, a4, a0 ; RV32IF-NEXT: mv a0, a3 @@ -3376,11 +3376,11 @@ ; RV64-NEXT: .LBB47_2: # %entry ; RV64-NEXT: slti a3, a1, 1 ; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: beqz a2, .LBB47_4 ; RV64-NEXT: # %bb.3: # %entry ; RV64-NEXT: sgtz a1, a2 @@ -3424,14 +3424,14 @@ ; RV32IFD-NEXT: seqz a6, a1 ; RV32IFD-NEXT: .LBB47_7: # %entry ; RV32IFD-NEXT: neg a6, a6 -; RV32IFD-NEXT: and a3, a6, a3 ; RV32IFD-NEXT: xori a1, a1, 1 ; RV32IFD-NEXT: or a1, a1, a0 ; RV32IFD-NEXT: seqz a1, a1 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: and a3, a1, a3 -; RV32IFD-NEXT: and a4, a6, a4 +; RV32IFD-NEXT: and a3, a3, a6 ; RV32IFD-NEXT: and a1, a1, a4 +; RV32IFD-NEXT: and a1, a1, a6 ; RV32IFD-NEXT: neg a4, a5 ; RV32IFD-NEXT: and a4, a4, a0 ; RV32IFD-NEXT: mv a0, a3 @@ -3660,14 +3660,14 @@ ; RV32-NEXT: seqz a6, a1 ; RV32-NEXT: .LBB50_7: # %entry ; RV32-NEXT: neg a6, a6 -; RV32-NEXT: and a3, a6, a3 ; RV32-NEXT: xori a1, a1, 1 ; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: seqz a1, a1 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a3, a1, a3 -; RV32-NEXT: and a4, a6, a4 +; RV32-NEXT: and a3, a3, a6 ; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a6 ; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a4, a4, a0 ; RV32-NEXT: mv a0, a3 @@ -3710,11 +3710,11 @@ ; RV64-NEXT: .LBB50_2: # %entry ; RV64-NEXT: slti a3, a1, 1 ; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: beqz a2, .LBB50_4 ; RV64-NEXT: # %bb.3: # %entry ; RV64-NEXT: sgtz a1, a2 @@ -3975,14 +3975,14 @@ ; RV32-NEXT: seqz a6, a1 ; RV32-NEXT: .LBB53_7: # %entry ; RV32-NEXT: neg a6, a6 -; RV32-NEXT: and a3, a6, a3 ; RV32-NEXT: xori a1, a1, 1 ; RV32-NEXT: or a1, a1, a0 ; RV32-NEXT: seqz a1, a1 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a3, a1, a3 -; RV32-NEXT: and a4, a6, a4 +; RV32-NEXT: and a3, a3, a6 ; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a6 ; RV32-NEXT: neg a4, a5 ; RV32-NEXT: and a4, a4, a0 ; RV32-NEXT: mv a0, a3 @@ -4027,11 +4027,11 @@ ; RV64-NEXT: .LBB53_2: # %entry ; RV64-NEXT: slti a3, a1, 1 ; RV64-NEXT: neg a3, a3 -; RV64-NEXT: and a0, a3, a0 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: seqz a1, a1 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a3 ; RV64-NEXT: beqz a2, .LBB53_4 ; RV64-NEXT: # %bb.3: # %entry ; RV64-NEXT: sgtz a1, a2 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -5695,10 +5695,10 @@ ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 ; CHECK-NOV-NEXT: seqz a5, s1 ; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a5, a5, s0 ; CHECK-NOV-NEXT: and a0, a5, a0 ; CHECK-NOV-NEXT: beqz a4, .LBB47_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry @@ -6185,10 +6185,10 @@ ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 ; CHECK-NOV-NEXT: seqz a5, s1 ; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a5, a5, s0 ; CHECK-NOV-NEXT: and a0, a5, a0 ; CHECK-NOV-NEXT: beqz a4, .LBB50_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry @@ -6664,10 +6664,10 @@ ; CHECK-NOV-NEXT: addi a1, a1, -1 ; CHECK-NOV-NEXT: slti a0, s1, 1 ; CHECK-NOV-NEXT: neg a0, a0 -; CHECK-NOV-NEXT: and a0, a0, s0 ; CHECK-NOV-NEXT: addi s1, s1, -1 ; CHECK-NOV-NEXT: seqz a5, s1 ; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: and a5, a5, s0 ; CHECK-NOV-NEXT: and a0, a5, a0 ; CHECK-NOV-NEXT: beqz a4, .LBB53_6 ; CHECK-NOV-NEXT: # %bb.5: # %entry @@ -6727,10 +6727,10 @@ ; CHECK-V-NEXT: addi a1, a1, -1 ; CHECK-V-NEXT: slti a0, s1, 1 ; CHECK-V-NEXT: neg a0, a0 -; CHECK-V-NEXT: and a0, a0, s0 ; CHECK-V-NEXT: addi s1, s1, -1 ; CHECK-V-NEXT: seqz a5, s1 ; CHECK-V-NEXT: addi a5, a5, -1 +; CHECK-V-NEXT: and a5, a5, s0 ; CHECK-V-NEXT: and a0, a5, a0 ; CHECK-V-NEXT: beqz a4, .LBB53_6 ; CHECK-V-NEXT: # %bb.5: # %entry diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -225,8 +225,8 @@ ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: .LBB6_2: ; RV32I-NEXT: ret ; @@ -236,8 +236,8 @@ ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a0, a0 -; RV32ZBB-NEXT: add a1, a1, a2 -; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: .LBB6_2: ; RV32ZBB-NEXT: ret ; @@ -264,8 +264,8 @@ ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a2, a0 ; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: add a1, a1, a2 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: .LBB7_2: ; RV32I-NEXT: ret ; @@ -275,8 +275,8 @@ ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a2, a0 ; RV32ZBB-NEXT: neg a0, a0 -; RV32ZBB-NEXT: add a1, a1, a2 -; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: .LBB7_2: ; RV32ZBB-NEXT: ret ; @@ -301,64 +301,64 @@ define i128 @abs128(i128 %x) { ; RV32I-LABEL: abs128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: snez a5, a3 +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: snez a5, a4 ; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: beqz a2, .LBB8_2 +; RV32I-NEXT: beqz a3, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: snez a6, a2 +; RV32I-NEXT: snez a6, a3 ; RV32I-NEXT: .LBB8_2: ; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bgez a4, .LBB8_4 +; RV32I-NEXT: bgez a2, .LBB8_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: neg a7, a1 ; RV32I-NEXT: sltu t0, a7, a6 ; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: neg a4, t0 -; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a2, a1, a2 ; RV32I-NEXT: sub a1, a7, a6 -; RV32I-NEXT: add a2, a2, a5 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: add a3, a3, a5 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: .LBB8_4: -; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a2, 4(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: snez a5, a3 +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a2, 12(a1) +; RV32ZBB-NEXT: snez a5, a4 ; RV32ZBB-NEXT: mv a6, a5 -; RV32ZBB-NEXT: beqz a2, .LBB8_2 +; RV32ZBB-NEXT: beqz a3, .LBB8_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: snez a6, a2 +; RV32ZBB-NEXT: snez a6, a3 ; RV32ZBB-NEXT: .LBB8_2: ; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: bgez a4, .LBB8_4 +; RV32ZBB-NEXT: bgez a2, .LBB8_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: neg a7, a1 ; RV32ZBB-NEXT: sltu t0, a7, a6 ; RV32ZBB-NEXT: snez a1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: neg a4, t0 -; RV32ZBB-NEXT: sub a4, a4, a1 +; RV32ZBB-NEXT: add a1, a1, t0 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a2, a1, a2 ; RV32ZBB-NEXT: sub a1, a7, a6 -; RV32ZBB-NEXT: add a2, a2, a5 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: add a3, a3, a5 ; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: .LBB8_4: -; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a2, 4(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: abs128: @@ -367,8 +367,8 @@ ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: snez a2, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: sub a1, a2, a1 ; RV64I-NEXT: .LBB8_2: ; RV64I-NEXT: ret ; @@ -378,8 +378,8 @@ ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: snez a2, a0 ; RV64ZBB-NEXT: neg a0, a0 -; RV64ZBB-NEXT: add a1, a1, a2 -; RV64ZBB-NEXT: neg a1, a1 +; RV64ZBB-NEXT: neg a2, a2 +; RV64ZBB-NEXT: sub a1, a2, a1 ; RV64ZBB-NEXT: .LBB8_2: ; RV64ZBB-NEXT: ret %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true) @@ -389,64 +389,64 @@ define i128 @select_abs128(i128 %x) { ; RV32I-LABEL: select_abs128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: snez a5, a3 +; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: snez a5, a4 ; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: beqz a2, .LBB9_2 +; RV32I-NEXT: beqz a3, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: snez a6, a2 +; RV32I-NEXT: snez a6, a3 ; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bgez a4, .LBB9_4 +; RV32I-NEXT: bgez a2, .LBB9_4 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: neg a7, a1 ; RV32I-NEXT: sltu t0, a7, a6 ; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: neg a4, t0 -; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a2, a1, a2 ; RV32I-NEXT: sub a1, a7, a6 -; RV32I-NEXT: add a2, a2, a5 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: add a3, a3, a5 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: .LBB9_4: -; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a4, 0(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: select_abs128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a2, 4(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: snez a5, a3 +; RV32ZBB-NEXT: lw a4, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a2, 12(a1) +; RV32ZBB-NEXT: snez a5, a4 ; RV32ZBB-NEXT: mv a6, a5 -; RV32ZBB-NEXT: beqz a2, .LBB9_2 +; RV32ZBB-NEXT: beqz a3, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: snez a6, a2 +; RV32ZBB-NEXT: snez a6, a3 ; RV32ZBB-NEXT: .LBB9_2: ; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: bgez a4, .LBB9_4 +; RV32ZBB-NEXT: bgez a2, .LBB9_4 ; RV32ZBB-NEXT: # %bb.3: ; RV32ZBB-NEXT: neg a7, a1 ; RV32ZBB-NEXT: sltu t0, a7, a6 ; RV32ZBB-NEXT: snez a1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: neg a4, t0 -; RV32ZBB-NEXT: sub a4, a4, a1 +; RV32ZBB-NEXT: add a1, a1, t0 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a2, a1, a2 ; RV32ZBB-NEXT: sub a1, a7, a6 -; RV32ZBB-NEXT: add a2, a2, a5 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: add a3, a3, a5 ; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: .LBB9_4: -; RV32ZBB-NEXT: sw a3, 0(a0) +; RV32ZBB-NEXT: sw a4, 0(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a2, 4(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: select_abs128: @@ -455,8 +455,8 @@ ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: snez a2, a0 ; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: sub a1, a2, a1 ; RV64I-NEXT: .LBB9_2: ; RV64I-NEXT: ret ; @@ -466,8 +466,8 @@ ; RV64ZBB-NEXT: # %bb.1: ; RV64ZBB-NEXT: snez a2, a0 ; RV64ZBB-NEXT: neg a0, a0 -; RV64ZBB-NEXT: add a1, a1, a2 -; RV64ZBB-NEXT: neg a1, a1 +; RV64ZBB-NEXT: neg a2, a2 +; RV64ZBB-NEXT: sub a1, a2, a1 ; RV64ZBB-NEXT: .LBB9_2: ; RV64ZBB-NEXT: ret %1 = icmp slt i128 %x, 0 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1274,9 +1274,9 @@ ; RV32I-NEXT: srli a7, a7, 26 ; RV32I-NEXT: slli t4, a5, 6 ; RV32I-NEXT: or a7, t4, a7 +; RV32I-NEXT: add a7, a7, t1 +; RV32I-NEXT: add a7, a7, t3 ; RV32I-NEXT: sub a5, a5, a7 -; RV32I-NEXT: add t1, t1, t3 -; RV32I-NEXT: sub a5, a5, t1 ; RV32I-NEXT: sub a7, t2, t0 ; RV32I-NEXT: sub a3, a3, a6 ; RV32I-NEXT: sub a3, a3, a4 diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -204,14 +204,14 @@ ; RV32I-NEXT: bgez a1, .LBB5_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: snez a3, a0 -; RV32I-NEXT: add a1, a1, a3 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB5_2: ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: snez a3, a0 -; RV32I-NEXT: add a3, a1, a3 ; RV32I-NEXT: neg a3, a3 +; RV32I-NEXT: sub a3, a3, a1 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: sw a1, 4(a2) ; RV32I-NEXT: mv a1, a3 @@ -222,14 +222,14 @@ ; RV32ZBB-NEXT: bgez a1, .LBB5_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: snez a3, a0 -; RV32ZBB-NEXT: add a1, a1, a3 -; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB5_2: ; RV32ZBB-NEXT: sw a0, 0(a2) ; RV32ZBB-NEXT: snez a3, a0 -; RV32ZBB-NEXT: add a3, a1, a3 ; RV32ZBB-NEXT: neg a3, a3 +; RV32ZBB-NEXT: sub a3, a3, a1 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: sw a1, 4(a2) ; RV32ZBB-NEXT: mv a1, a3 diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1076,8 +1076,8 @@ ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a3, a5, a3 ; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a5, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_shared: @@ -1131,8 +1131,8 @@ ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: add a0, a1, a0 ; RV32ZBB-NEXT: sltu a1, a0, a1 -; RV32ZBB-NEXT: add a3, a5, a3 ; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: add a1, a5, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_shared: @@ -1232,7 +1232,7 @@ ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: add a0, a6, a0 ; RV32I-NEXT: sltu a2, a0, a6 -; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: add a2, a3, a2 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; @@ -1286,7 +1286,7 @@ ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: add a0, a6, a0 ; RV32ZBB-NEXT: sltu a2, a0, a6 -; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: add a2, a3, a2 ; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: ret ; @@ -1369,14 +1369,14 @@ ; RV32I-NEXT: srl t0, t0, a1 ; RV32I-NEXT: sll t1, a0, a4 ; RV32I-NEXT: srli a0, a6, 1 -; RV32I-NEXT: srl t2, a0, a1 +; RV32I-NEXT: srl a6, a0, a1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: bnez a5, .LBB21_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB21_6: -; RV32I-NEXT: or a6, a7, t0 -; RV32I-NEXT: or a7, t1, t2 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: sll t0, a0, a4 ; RV32I-NEXT: bnez a5, .LBB21_8 ; RV32I-NEXT: # %bb.7: @@ -1388,11 +1388,11 @@ ; RV32I-NEXT: sll a2, a2, a4 ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: srl a0, a0, a1 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: add a1, a7, a0 -; RV32I-NEXT: add a0, a6, a3 -; RV32I-NEXT: sltu a2, a0, a6 -; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: or a2, a2, a0 +; RV32I-NEXT: add a0, a7, a3 +; RV32I-NEXT: sltu a1, a0, a7 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: add a1, a6, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_multiple: @@ -1426,14 +1426,14 @@ ; RV32ZBB-NEXT: srl t0, t0, a1 ; RV32ZBB-NEXT: sll t1, a0, a4 ; RV32ZBB-NEXT: srli a0, a6, 1 -; RV32ZBB-NEXT: srl t2, a0, a1 +; RV32ZBB-NEXT: srl a6, a0, a1 ; RV32ZBB-NEXT: mv a0, a3 ; RV32ZBB-NEXT: bnez a5, .LBB21_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv a0, a2 ; RV32ZBB-NEXT: .LBB21_6: -; RV32ZBB-NEXT: or a6, a7, t0 -; RV32ZBB-NEXT: or a7, t1, t2 +; RV32ZBB-NEXT: or a7, a7, t0 +; RV32ZBB-NEXT: or a6, t1, a6 ; RV32ZBB-NEXT: sll t0, a0, a4 ; RV32ZBB-NEXT: bnez a5, .LBB21_8 ; RV32ZBB-NEXT: # %bb.7: @@ -1445,11 +1445,11 @@ ; RV32ZBB-NEXT: sll a2, a2, a4 ; RV32ZBB-NEXT: srli a0, a0, 1 ; RV32ZBB-NEXT: srl a0, a0, a1 -; RV32ZBB-NEXT: or a0, a2, a0 -; RV32ZBB-NEXT: add a1, a7, a0 -; RV32ZBB-NEXT: add a0, a6, a3 -; RV32ZBB-NEXT: sltu a2, a0, a6 -; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: or a2, a2, a0 +; RV32ZBB-NEXT: add a0, a7, a3 +; RV32ZBB-NEXT: sltu a1, a0, a7 +; RV32ZBB-NEXT: add a1, a2, a1 +; RV32ZBB-NEXT: add a1, a6, a1 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_multiple: @@ -1527,16 +1527,16 @@ ; RV32I-NEXT: slli t0, a1, 1 ; RV32I-NEXT: not a0, a4 ; RV32I-NEXT: sll t0, t0, a0 -; RV32I-NEXT: srl t1, a1, a4 +; RV32I-NEXT: srl a1, a1, a4 ; RV32I-NEXT: slli a6, a6, 1 -; RV32I-NEXT: sll t2, a6, a0 +; RV32I-NEXT: sll t1, a6, a0 ; RV32I-NEXT: mv a6, a2 ; RV32I-NEXT: beqz a5, .LBB23_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: mv a6, a3 ; RV32I-NEXT: .LBB23_6: -; RV32I-NEXT: or a1, t0, a7 -; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or a7, t0, a7 +; RV32I-NEXT: or a1, t1, a1 ; RV32I-NEXT: srl t0, a6, a4 ; RV32I-NEXT: beqz a5, .LBB23_8 ; RV32I-NEXT: # %bb.7: @@ -1548,11 +1548,11 @@ ; RV32I-NEXT: srl a3, a3, a4 ; RV32I-NEXT: slli a6, a6, 1 ; RV32I-NEXT: sll a0, a6, a0 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: add a7, a7, a0 -; RV32I-NEXT: add a0, a1, a2 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: add a1, a7, a1 +; RV32I-NEXT: or a3, a0, a3 +; RV32I-NEXT: add a0, a7, a2 +; RV32I-NEXT: sltu a2, a0, a7 +; RV32I-NEXT: add a2, a3, a2 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotr_64_mask_multiple: @@ -1583,16 +1583,16 @@ ; RV32ZBB-NEXT: slli t0, a1, 1 ; RV32ZBB-NEXT: not a0, a4 ; RV32ZBB-NEXT: sll t0, t0, a0 -; RV32ZBB-NEXT: srl t1, a1, a4 +; RV32ZBB-NEXT: srl a1, a1, a4 ; RV32ZBB-NEXT: slli a6, a6, 1 -; RV32ZBB-NEXT: sll t2, a6, a0 +; RV32ZBB-NEXT: sll t1, a6, a0 ; RV32ZBB-NEXT: mv a6, a2 ; RV32ZBB-NEXT: beqz a5, .LBB23_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: mv a6, a3 ; RV32ZBB-NEXT: .LBB23_6: -; RV32ZBB-NEXT: or a1, t0, a7 -; RV32ZBB-NEXT: or a7, t2, t1 +; RV32ZBB-NEXT: or a7, t0, a7 +; RV32ZBB-NEXT: or a1, t1, a1 ; RV32ZBB-NEXT: srl t0, a6, a4 ; RV32ZBB-NEXT: beqz a5, .LBB23_8 ; RV32ZBB-NEXT: # %bb.7: @@ -1604,11 +1604,11 @@ ; RV32ZBB-NEXT: srl a3, a3, a4 ; RV32ZBB-NEXT: slli a6, a6, 1 ; RV32ZBB-NEXT: sll a0, a6, a0 -; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: add a7, a7, a0 -; RV32ZBB-NEXT: add a0, a1, a2 -; RV32ZBB-NEXT: sltu a1, a0, a1 -; RV32ZBB-NEXT: add a1, a7, a1 +; RV32ZBB-NEXT: or a3, a0, a3 +; RV32ZBB-NEXT: add a0, a7, a2 +; RV32ZBB-NEXT: sltu a2, a0, a7 +; RV32ZBB-NEXT: add a2, a3, a2 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotr_64_mask_multiple: diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -720,8 +720,8 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: snez a2, a0 ; CHECK-NEXT: neg a0, a0 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: neg a1, a1 +; CHECK-NEXT: neg a2, a2 +; CHECK-NEXT: sub a1, a2, a1 ; CHECK-NEXT: .LBB19_2: ; CHECK-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll --- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll @@ -16,8 +16,8 @@ ; CHECK-NEXT: slli a2, a2, 32 ; CHECK-NEXT: mulhu a1, a2, a1 ; CHECK-NEXT: srli a1, a1, 1 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: addw a0, a0, a1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addw a0, a3, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: li a0, 0 @@ -61,8 +61,8 @@ ; CHECK-NEXT: slli a3, a3, 32 ; CHECK-NEXT: mulhu a1, a3, a1 ; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: subw a0, a2, a0 -; CHECK-NEXT: subw a0, a0, a1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: li a0, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -828,23 +828,23 @@ ; CHECK-NEXT: addi a4, a4, 1 ; CHECK-NEXT: andi a5, a4, -32 ; CHECK-NEXT: add a3, a5, a2 -; CHECK-NEXT: slli a6, a2, 2 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: add a6, a1, a6 +; CHECK-NEXT: slli a7, a2, 2 +; CHECK-NEXT: add a6, a0, a2 +; CHECK-NEXT: add a2, a1, a2 +; CHECK-NEXT: add a2, a2, a7 ; CHECK-NEXT: li a7, 32 ; CHECK-NEXT: li t0, 5 ; CHECK-NEXT: mv t1, a5 ; CHECK-NEXT: .LBB13_3: # %bb15 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a7, e8, m1, ta, ma -; CHECK-NEXT: vlse8.v v8, (a6), t0 -; CHECK-NEXT: vle8.v v9, (a2) +; CHECK-NEXT: vlse8.v v8, (a2), t0 +; CHECK-NEXT: vle8.v v9, (a6) ; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vse8.v v8, (a2) +; CHECK-NEXT: vse8.v v8, (a6) ; CHECK-NEXT: addi t1, t1, -32 -; CHECK-NEXT: addi a2, a2, 32 -; CHECK-NEXT: addi a6, a6, 160 +; CHECK-NEXT: addi a6, a6, 32 +; CHECK-NEXT: addi a2, a2, 160 ; CHECK-NEXT: bnez t1, .LBB13_3 ; CHECK-NEXT: # %bb.4: # %bb30 ; CHECK-NEXT: beq a4, a5, .LBB13_7 @@ -852,8 +852,8 @@ ; CHECK-NEXT: addiw a2, a3, -1024 ; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: slli a4, a3, 2 -; CHECK-NEXT: add a3, a4, a3 ; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: .LBB13_6: # %bb35 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lb a3, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -354,17 +354,17 @@ ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vcpop.m a2, v11, v0.t ; CHECK-NEXT: seqz a2, a2 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: addi a2, a1, -128 -; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: addi a3, a1, -128 +; CHECK-NEXT: sltu a1, a1, a3 ; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: and a1, a1, a3 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmnot.m v8, v8 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vcpop.m a1, v8, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret %r = call i1 @llvm.vp.reduce.and.v256i1(i1 %s, <256 x i1> %v, <256 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -581,8 +581,8 @@ ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vcpop.m a1, v11, v0.t ; RV32-NEXT: snez a1, a1 -; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: slli a0, a0, 31 ; RV32-NEXT: srai a0, a0, 31 ; RV32-NEXT: ret @@ -608,8 +608,8 @@ ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vcpop.m a1, v11, v0.t ; RV64-NEXT: snez a1, a1 -; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: or a0, a3, a0 +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: slli a0, a0, 63 ; RV64-NEXT: srai a0, a0, 63 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1061,9 +1061,9 @@ ; CHECK-NEXT: .LBB18_10: # %if.end ; CHECK-NEXT: addiw a2, a2, -1 ; CHECK-NEXT: andi a2, a2, -2 -; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: not a0, a0 ; CHECK-NEXT: srli a0, a0, 31 +; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: addw a0, a1, a0 ; CHECK-NEXT: .LBB18_11: # %cleanup ; CHECK-NEXT: ret @@ -1108,9 +1108,9 @@ ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: addiw a2, a2, -1 ; NOREMOVAL-NEXT: andi a2, a2, -2 -; NOREMOVAL-NEXT: add a1, a1, a2 ; NOREMOVAL-NEXT: not a0, a0 ; NOREMOVAL-NEXT: srli a0, a0, 31 +; NOREMOVAL-NEXT: add a0, a2, a0 ; NOREMOVAL-NEXT: addw a0, a1, a0 ; NOREMOVAL-NEXT: .LBB18_11: # %cleanup ; NOREMOVAL-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -2239,11 +2239,11 @@ ; RV32I-NEXT: # %bb.84: ; RV32I-NEXT: mv t5, t2 ; RV32I-NEXT: .LBB9_85: -; RV32I-NEXT: and a1, s9, t5 -; RV32I-NEXT: and a1, a7, a1 +; RV32I-NEXT: and a1, a7, t5 +; RV32I-NEXT: and a1, a1, s9 ; RV32I-NEXT: and a5, s3, t2 -; RV32I-NEXT: and a5, s9, a5 ; RV32I-NEXT: and a5, a7, a5 +; RV32I-NEXT: and a5, a5, s9 ; RV32I-NEXT: sb a1, 24(a2) ; RV32I-NEXT: sb a5, 28(a2) ; RV32I-NEXT: srli a7, a1, 24 @@ -2706,19 +2706,19 @@ ; RV32I-NEXT: or s2, s3, s11 ; RV32I-NEXT: or s0, ra, s0 ; RV32I-NEXT: or s3, s7, s6 -; RV32I-NEXT: neg s6, a5 -; RV32I-NEXT: srl ra, s8, s6 -; RV32I-NEXT: li s7, 160 +; RV32I-NEXT: neg s7, a5 +; RV32I-NEXT: srl ra, s8, s7 +; RV32I-NEXT: li s6, 160 ; RV32I-NEXT: addi t6, a5, -128 ; RV32I-NEXT: li t2, 64 -; RV32I-NEXT: sub s7, s7, a5 -; RV32I-NEXT: sw s7, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sub s6, s6, a5 +; RV32I-NEXT: sw s6, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: bgeu t6, t2, .LBB10_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slti s7, s7, 0 -; RV32I-NEXT: neg s7, s7 -; RV32I-NEXT: and s7, s7, ra -; RV32I-NEXT: or s10, s5, s7 +; RV32I-NEXT: slti s6, s6, 0 +; RV32I-NEXT: neg s6, s6 +; RV32I-NEXT: and s6, s6, ra +; RV32I-NEXT: or s10, s5, s6 ; RV32I-NEXT: .LBB10_6: ; RV32I-NEXT: slli t1, t1, 8 ; RV32I-NEXT: slli t3, t3, 16 @@ -2760,15 +2760,15 @@ ; RV32I-NEXT: srl a0, s0, a0 ; RV32I-NEXT: or t3, t3, a0 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: srl s5, s3, s6 +; RV32I-NEXT: srl s5, s3, s7 ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: sub s0, a0, a5 ; RV32I-NEXT: sw s0, 68(sp) # 4-byte Folded Spill ; RV32I-NEXT: slti s1, s0, 0 -; RV32I-NEXT: neg s7, s1 +; RV32I-NEXT: neg s6, s1 ; RV32I-NEXT: bgeu a5, t2, .LBB10_14 ; RV32I-NEXT: # %bb.13: -; RV32I-NEXT: and a4, s7, s5 +; RV32I-NEXT: and a4, s6, s5 ; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: .LBB10_14: ; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill @@ -2777,7 +2777,7 @@ ; RV32I-NEXT: # %bb.15: ; RV32I-NEXT: mv t3, a4 ; RV32I-NEXT: .LBB10_16: -; RV32I-NEXT: srl s10, s2, s6 +; RV32I-NEXT: srl s10, s2, s7 ; RV32I-NEXT: li a4, 96 ; RV32I-NEXT: sub s5, a4, a5 ; RV32I-NEXT: slti a4, s5, 0 @@ -2819,7 +2819,7 @@ ; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: bltu s11, t2, .LBB10_22 ; RV32I-NEXT: .LBB10_24: -; RV32I-NEXT: and a4, s7, s10 +; RV32I-NEXT: and a4, s6, s10 ; RV32I-NEXT: mv a0, s8 ; RV32I-NEXT: beqz s11, .LBB10_26 ; RV32I-NEXT: .LBB10_25: @@ -2829,7 +2829,7 @@ ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: lw t1, 60(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB10_28: -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sltiu t0, a5, 64 ; RV32I-NEXT: lw a4, 64(sp) # 4-byte Folded Reload ; RV32I-NEXT: mv s0, s9 @@ -2855,7 +2855,7 @@ ; RV32I-NEXT: .LBB10_33: ; RV32I-NEXT: mv t0, a4 ; RV32I-NEXT: .LBB10_34: -; RV32I-NEXT: srl t1, a3, s6 +; RV32I-NEXT: srl t1, a3, s7 ; RV32I-NEXT: slli a4, s2, 1 ; RV32I-NEXT: sub a0, t2, a5 ; RV32I-NEXT: not a0, a0 @@ -2871,20 +2871,20 @@ ; RV32I-NEXT: or s2, t1, a0 ; RV32I-NEXT: .LBB10_37: ; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl s10, a0, s6 +; RV32I-NEXT: srl s10, a0, s7 ; RV32I-NEXT: slli s8, s8, 1 ; RV32I-NEXT: not a0, s11 ; RV32I-NEXT: bltz s5, .LBB10_39 ; RV32I-NEXT: # %bb.38: ; RV32I-NEXT: mv t4, s8 -; RV32I-NEXT: mv s7, s10 +; RV32I-NEXT: mv s6, s10 ; RV32I-NEXT: mv s8, ra ; RV32I-NEXT: bltu s11, t2, .LBB10_40 ; RV32I-NEXT: j .LBB10_41 ; RV32I-NEXT: .LBB10_39: ; RV32I-NEXT: mv t4, s8 ; RV32I-NEXT: sll s8, s8, a0 -; RV32I-NEXT: mv s7, s10 +; RV32I-NEXT: mv s6, s10 ; RV32I-NEXT: or s8, s10, s8 ; RV32I-NEXT: bgeu s11, t2, .LBB10_41 ; RV32I-NEXT: .LBB10_40: @@ -2926,7 +2926,7 @@ ; RV32I-NEXT: lw t3, 68(sp) # 4-byte Folded Reload ; RV32I-NEXT: bgez t3, .LBB10_49 ; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl t3, t5, s6 +; RV32I-NEXT: srl t3, t5, s7 ; RV32I-NEXT: slli s3, s3, 1 ; RV32I-NEXT: lw t5, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll t5, s3, t5 @@ -2977,7 +2977,7 @@ ; RV32I-NEXT: sub a0, a0, a5 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: sll a0, t4, a0 -; RV32I-NEXT: or a0, s7, a0 +; RV32I-NEXT: or a0, s6, a0 ; RV32I-NEXT: bltu t6, t2, .LBB10_57 ; RV32I-NEXT: .LBB10_59: ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload @@ -3043,7 +3043,7 @@ ; RV32I-NEXT: .LBB10_76: ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll a0, t4, a0 -; RV32I-NEXT: or ra, s7, a0 +; RV32I-NEXT: or ra, s6, a0 ; RV32I-NEXT: sltiu a0, a5, 128 ; RV32I-NEXT: bgeu a5, t2, .LBB10_69 ; RV32I-NEXT: .LBB10_77: @@ -3061,11 +3061,11 @@ ; RV32I-NEXT: # %bb.80: ; RV32I-NEXT: mv t3, s4 ; RV32I-NEXT: .LBB10_81: -; RV32I-NEXT: and a3, s9, t3 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: and a3, a4, t3 +; RV32I-NEXT: and a3, a3, s9 ; RV32I-NEXT: and a5, s8, s4 -; RV32I-NEXT: and a5, s9, a5 ; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a4, a4, s9 ; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: srli a5, a4, 24 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -2210,11 +2210,11 @@ ; RV32I-NEXT: # %bb.84: ; RV32I-NEXT: mv t5, t2 ; RV32I-NEXT: .LBB9_85: -; RV32I-NEXT: and a1, s9, t5 -; RV32I-NEXT: and a1, a7, a1 +; RV32I-NEXT: and a1, a7, t5 +; RV32I-NEXT: and a1, a1, s9 ; RV32I-NEXT: and a5, s3, t2 -; RV32I-NEXT: and a5, s9, a5 ; RV32I-NEXT: and a5, a7, a5 +; RV32I-NEXT: and a5, a5, s9 ; RV32I-NEXT: sb a1, 24(a2) ; RV32I-NEXT: sb a5, 28(a2) ; RV32I-NEXT: srli a7, a1, 24 @@ -2674,19 +2674,19 @@ ; RV32I-NEXT: or s2, s2, s11 ; RV32I-NEXT: or s0, ra, s0 ; RV32I-NEXT: or s3, s7, s6 -; RV32I-NEXT: neg s6, a5 -; RV32I-NEXT: srl ra, s8, s6 -; RV32I-NEXT: li s7, 160 +; RV32I-NEXT: neg s7, a5 +; RV32I-NEXT: srl ra, s8, s7 +; RV32I-NEXT: li s6, 160 ; RV32I-NEXT: addi t6, a5, -128 ; RV32I-NEXT: li t2, 64 -; RV32I-NEXT: sub s7, s7, a5 -; RV32I-NEXT: sw s7, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sub s6, s6, a5 +; RV32I-NEXT: sw s6, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: bgeu t6, t2, .LBB10_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slti s7, s7, 0 -; RV32I-NEXT: neg s7, s7 -; RV32I-NEXT: and s7, s7, ra -; RV32I-NEXT: or s10, s5, s7 +; RV32I-NEXT: slti s6, s6, 0 +; RV32I-NEXT: neg s6, s6 +; RV32I-NEXT: and s6, s6, ra +; RV32I-NEXT: or s10, s5, s6 ; RV32I-NEXT: .LBB10_6: ; RV32I-NEXT: slli t1, t1, 8 ; RV32I-NEXT: slli t3, t3, 16 @@ -2728,15 +2728,15 @@ ; RV32I-NEXT: srl a0, s0, a0 ; RV32I-NEXT: or t3, t3, a0 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: srl s5, s2, s6 +; RV32I-NEXT: srl s5, s2, s7 ; RV32I-NEXT: li a0, 32 ; RV32I-NEXT: sub s0, a0, a5 ; RV32I-NEXT: sw s0, 68(sp) # 4-byte Folded Spill ; RV32I-NEXT: slti s1, s0, 0 -; RV32I-NEXT: neg s7, s1 +; RV32I-NEXT: neg s6, s1 ; RV32I-NEXT: bgeu a5, t2, .LBB10_14 ; RV32I-NEXT: # %bb.13: -; RV32I-NEXT: and a4, s7, s5 +; RV32I-NEXT: and a4, s6, s5 ; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: .LBB10_14: ; RV32I-NEXT: sw s5, 28(sp) # 4-byte Folded Spill @@ -2745,7 +2745,7 @@ ; RV32I-NEXT: # %bb.15: ; RV32I-NEXT: mv t3, a4 ; RV32I-NEXT: .LBB10_16: -; RV32I-NEXT: srl s10, s3, s6 +; RV32I-NEXT: srl s10, s3, s7 ; RV32I-NEXT: li a4, 96 ; RV32I-NEXT: sub s5, a4, a5 ; RV32I-NEXT: slti a4, s5, 0 @@ -2787,7 +2787,7 @@ ; RV32I-NEXT: or a0, a0, t0 ; RV32I-NEXT: bltu s11, t2, .LBB10_22 ; RV32I-NEXT: .LBB10_24: -; RV32I-NEXT: and a4, s7, s10 +; RV32I-NEXT: and a4, s6, s10 ; RV32I-NEXT: mv a0, s8 ; RV32I-NEXT: beqz s11, .LBB10_26 ; RV32I-NEXT: .LBB10_25: @@ -2797,7 +2797,7 @@ ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: lw t1, 60(sp) # 4-byte Folded Reload ; RV32I-NEXT: .LBB10_28: -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sltiu t0, a5, 64 ; RV32I-NEXT: lw a4, 64(sp) # 4-byte Folded Reload ; RV32I-NEXT: mv s0, s9 @@ -2823,7 +2823,7 @@ ; RV32I-NEXT: .LBB10_33: ; RV32I-NEXT: mv t0, a4 ; RV32I-NEXT: .LBB10_34: -; RV32I-NEXT: srl t1, a3, s6 +; RV32I-NEXT: srl t1, a3, s7 ; RV32I-NEXT: slli a4, s3, 1 ; RV32I-NEXT: sub a0, t2, a5 ; RV32I-NEXT: not a0, a0 @@ -2839,20 +2839,20 @@ ; RV32I-NEXT: or s3, t1, a0 ; RV32I-NEXT: .LBB10_37: ; RV32I-NEXT: lw a0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: srl s10, a0, s6 +; RV32I-NEXT: srl s10, a0, s7 ; RV32I-NEXT: slli s8, s8, 1 ; RV32I-NEXT: not a0, s11 ; RV32I-NEXT: bltz s5, .LBB10_39 ; RV32I-NEXT: # %bb.38: ; RV32I-NEXT: mv t4, s8 -; RV32I-NEXT: mv s7, s10 +; RV32I-NEXT: mv s6, s10 ; RV32I-NEXT: mv s8, ra ; RV32I-NEXT: bltu s11, t2, .LBB10_40 ; RV32I-NEXT: j .LBB10_41 ; RV32I-NEXT: .LBB10_39: ; RV32I-NEXT: mv t4, s8 ; RV32I-NEXT: sll s8, s8, a0 -; RV32I-NEXT: mv s7, s10 +; RV32I-NEXT: mv s6, s10 ; RV32I-NEXT: or s8, s10, s8 ; RV32I-NEXT: bgeu s11, t2, .LBB10_41 ; RV32I-NEXT: .LBB10_40: @@ -2894,7 +2894,7 @@ ; RV32I-NEXT: lw t3, 68(sp) # 4-byte Folded Reload ; RV32I-NEXT: bgez t3, .LBB10_49 ; RV32I-NEXT: # %bb.48: -; RV32I-NEXT: srl t3, t5, s6 +; RV32I-NEXT: srl t3, t5, s7 ; RV32I-NEXT: slli s2, s2, 1 ; RV32I-NEXT: lw t5, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll t5, s2, t5 @@ -2945,7 +2945,7 @@ ; RV32I-NEXT: sub a0, a0, a5 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: sll a0, t4, a0 -; RV32I-NEXT: or a0, s7, a0 +; RV32I-NEXT: or a0, s6, a0 ; RV32I-NEXT: bltu t6, t2, .LBB10_57 ; RV32I-NEXT: .LBB10_59: ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload @@ -3011,7 +3011,7 @@ ; RV32I-NEXT: .LBB10_76: ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll a0, t4, a0 -; RV32I-NEXT: or ra, s7, a0 +; RV32I-NEXT: or ra, s6, a0 ; RV32I-NEXT: sltiu a0, a5, 128 ; RV32I-NEXT: bgeu a5, t2, .LBB10_69 ; RV32I-NEXT: .LBB10_77: @@ -3029,11 +3029,11 @@ ; RV32I-NEXT: # %bb.80: ; RV32I-NEXT: mv t3, s4 ; RV32I-NEXT: .LBB10_81: -; RV32I-NEXT: and a3, s9, t3 -; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: and a3, a4, t3 +; RV32I-NEXT: and a3, a3, s9 ; RV32I-NEXT: and a5, s8, s4 -; RV32I-NEXT: and a5, s9, a5 ; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: and a4, a4, s9 ; RV32I-NEXT: sb a4, 0(a2) ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: srli a5, a4, 24 diff --git a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll --- a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll @@ -11,30 +11,30 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $r9 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $r8 ; CHECK-NEXT: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rcx :: (store (s64) into %stack.1) - ; CHECK-NEXT: undef %82.sub_32bit:gr64_with_sub_8bit = COPY $edx - ; CHECK-NEXT: undef %84.sub_32bit:gr64_nosp = COPY $esi + ; CHECK-NEXT: undef %83.sub_32bit:gr64_with_sub_8bit = COPY $edx + ; CHECK-NEXT: undef %85.sub_32bit:gr64_nosp = COPY $esi ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 ; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4) ; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4) ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.4, align 8) ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.5, align 16) - ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r %82, 1, $noreg, 63, $noreg - ; CHECK-NEXT: TEST32rr %82.sub_32bit, %82.sub_32bit, implicit-def $eflags - ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], %82.sub_32bit, 9, implicit killed $eflags + ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r %83, 1, $noreg, 63, $noreg + ; CHECK-NEXT: TEST32rr %83.sub_32bit, %83.sub_32bit, implicit-def $eflags + ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], %83.sub_32bit, 9, implicit $eflags ; CHECK-NEXT: CMP32rr [[MOV32rm1]], [[MOV32rm]], implicit-def $eflags - ; CHECK-NEXT: JCC_1 %bb.4, 13, implicit killed $eflags + ; CHECK-NEXT: JCC_1 %bb.4, 13, implicit $eflags ; CHECK-NEXT: JMP_1 %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond14.preheader.lr.ph: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %88.sub_32bit:gr64_nosp = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 8) - ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, %88.sub_16bit :: (store (s512) into %stack.0 + 16, align 4) + ; CHECK-NEXT: undef %89.sub_32bit:gr64_nosp = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, %89.sub_16bit :: (store (s512) into %stack.0 + 16, align 4) ; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.3, align 16) ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 49, $noreg, [[MOV32rm2]].sub_8bit :: (store (s512) into %stack.0 + 49, align 1, basealign 4) ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, [[MOV32rm2]].sub_8bit :: (store (s512) into %stack.0 + 48, align 4) ; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[AND32ri8_]], -64, implicit-def dead $eflags - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY %82.sub_32bit + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY %83.sub_32bit ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 18, $noreg, [[COPY2]].sub_16bit :: (store (s512) into %stack.0 + 18, align 2, basealign 4) ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[SUB32rr]], [[AND32ri8_]], implicit-def dead $eflags ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 18, $noreg, [[SUB32rr]].sub_16bit :: (store (s512) into %stack.0 + 18, align 2, basealign 4) @@ -43,32 +43,33 @@ ; CHECK-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[SHR32ri]], 2, implicit-def dead $eflags ; CHECK-NEXT: MOV32mr %stack.2, 1, $noreg, 0, $noreg, [[SHR32ri]] :: (store (s32) into %stack.2) ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 50, $noreg, [[SHR32ri]].sub_8bit :: (store (s512) into %stack.0 + 50, align 2, basealign 4) - ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r $noreg, 4, %88, 0, $noreg + ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r $noreg, 4, %89, 0, $noreg ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 20, $noreg, [[LEA64_32r]].sub_16bit :: (store (s512) into %stack.0 + 20, align 4) ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) - ; CHECK-NEXT: [[MOVSX64rr32_:%[0-9]+]]:gr64_nosp = MOVSX64rr32 %82.sub_32bit - ; CHECK-NEXT: %82.sub_32bit:gr64_with_sub_8bit = nsw SUB32rr %82.sub_32bit, [[SUB32rr]], implicit-def dead $eflags - ; CHECK-NEXT: undef %102.sub_32bit:gr64_with_sub_8bit = MOVZX32rr16 %82.sub_16bit - ; CHECK-NEXT: MOV64mr %stack.3, 1, $noreg, 0, $noreg, %102 :: (store (s64) into %stack.3) - ; CHECK-NEXT: undef %61.sub_32bit:gr64_with_sub_8bit = COPY %102.sub_32bit - ; CHECK-NEXT: %61.sub_32bit:gr64_with_sub_8bit = IMUL32rr %61.sub_32bit, %84.sub_32bit, implicit-def dead $eflags - ; CHECK-NEXT: [[LEA64_32r1:%[0-9]+]]:gr32 = LEA64_32r $noreg, 4, %84, 0, $noreg + ; CHECK-NEXT: [[MOVSX64rr32_:%[0-9]+]]:gr64_nosp = MOVSX64rr32 %83.sub_32bit + ; CHECK-NEXT: %83.sub_32bit:gr64_with_sub_8bit = nsw SUB32rr %83.sub_32bit, [[SUB32rr]], implicit-def dead $eflags + ; CHECK-NEXT: undef %14.sub_32bit:gr64_with_sub_8bit = MOVZX32rr16 %83.sub_16bit + ; CHECK-NEXT: ADD64mr %stack.1, 1, $noreg, 0, $noreg, %14, implicit-def dead $eflags :: (store (s64) into %stack.1) + ; CHECK-NEXT: undef %61.sub_32bit:gr64_with_sub_8bit = COPY %14.sub_32bit + ; CHECK-NEXT: %61.sub_32bit:gr64_with_sub_8bit = IMUL32rr %61.sub_32bit, %85.sub_32bit, implicit-def dead $eflags + ; CHECK-NEXT: [[LEA64_32r1:%[0-9]+]]:gr32 = LEA64_32r $noreg, 4, %85, 0, $noreg ; CHECK-NEXT: [[MOVSX64rr32_1:%[0-9]+]]:gr64 = MOVSX64rr32 [[LEA64_32r1]] - ; CHECK-NEXT: MOV64mr %stack.4, 1, $noreg, 0, $noreg, [[MOVSX64rr32_1]] :: (store (s64) into %stack.4) - ; CHECK-NEXT: [[MOVSX64rr32_2:%[0-9]+]]:gr64_nosp = MOVSX64rr32 %84.sub_32bit + ; CHECK-NEXT: MOV64mr %stack.3, 1, $noreg, 0, $noreg, [[MOVSX64rr32_1]] :: (store (s64) into %stack.3) + ; CHECK-NEXT: [[MOVSX64rr32_2:%[0-9]+]]:gr64_nosp = MOVSX64rr32 %85.sub_32bit ; CHECK-NEXT: [[MOVSX64rm32_:%[0-9]+]]:gr64_nosp = MOVSX64rm32 %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 8) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY [[COPY]] - ; CHECK-NEXT: [[MOVSX64rr32_3:%[0-9]+]]:gr64_nosp = MOVSX64rr32 %88.sub_32bit + ; CHECK-NEXT: [[MOVSX64rr32_3:%[0-9]+]]:gr64_nosp = MOVSX64rr32 %89.sub_32bit ; CHECK-NEXT: [[MOVSX64rm32_1:%[0-9]+]]:gr64 = MOVSX64rm32 %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 16) + ; CHECK-NEXT: MOV64mr %stack.5, 1, $noreg, 0, $noreg, [[MOVSX64rm32_1]] :: (store (s64) into %stack.5) ; CHECK-NEXT: [[MOVSX64rr32_4:%[0-9]+]]:gr64 = MOVSX64rr32 [[MOV32rm1]] - ; CHECK-NEXT: [[MOVSX64rr32_5:%[0-9]+]]:gr64 = MOVSX64rr32 [[MOV32rm2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV32rm2]] + ; CHECK-NEXT: [[MOVSX64rr32_5:%[0-9]+]]:gr64 = MOVSX64rr32 [[COPY2]] ; CHECK-NEXT: [[MOVSX64rr32_6:%[0-9]+]]:gr64 = MOVSX64rr32 [[MOV32rm]] ; CHECK-NEXT: MOV64mr %stack.8, 1, $noreg, 0, $noreg, [[MOVSX64rr32_6]] :: (store (s64) into %stack.8) ; CHECK-NEXT: MOV64mr %stack.6, 1, $noreg, 0, $noreg, [[MOVSX64rr32_4]] :: (store (s64) into %stack.6) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64_nosp = COPY [[MOVSX64rr32_4]] ; CHECK-NEXT: [[IMUL64rr:%[0-9]+]]:gr64_nosp = IMUL64rr [[IMUL64rr]], [[MOVSX64rr32_2]], implicit-def dead $eflags ; CHECK-NEXT: [[ADD64rr:%[0-9]+]]:gr64_nosp = ADD64rr [[ADD64rr]], [[MOVSX64rm32_]], implicit-def dead $eflags - ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY2]], 4, [[ADD64rr]], 0, $noreg + ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r [[COPY]], 4, [[ADD64rr]], 0, $noreg ; CHECK-NEXT: MOV64mr %stack.9, 1, $noreg, 0, $noreg, [[LEA64r]] :: (store (s64) into %stack.9) ; CHECK-NEXT: MOV64mr %stack.7, 1, $noreg, 0, $noreg, [[MOVSX64rr32_5]] :: (store (s64) into %stack.7) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_5]] @@ -76,32 +77,29 @@ ; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64 = SHL64ri [[SHL64ri]], 2, implicit-def dead $eflags ; CHECK-NEXT: MOV64mr %stack.10, 1, $noreg, 0, $noreg, [[SHL64ri]] :: (store (s64) into %stack.10) ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64 = LEA64r $noreg, 4, [[MOVSX64rr32_3]], 0, $noreg - ; CHECK-NEXT: MOV64mr %stack.5, 1, $noreg, 0, $noreg, [[MOVSX64rm32_]] :: (store (s64) into %stack.5) + ; CHECK-NEXT: MOV64mr %stack.4, 1, $noreg, 0, $noreg, [[MOVSX64rm32_]] :: (store (s64) into %stack.4) ; CHECK-NEXT: [[LEA64_32r2:%[0-9]+]]:gr32 = LEA64_32r %61, 4, [[MOVSX64rm32_]], 0, $noreg ; CHECK-NEXT: MOV32mr %stack.11, 1, $noreg, 0, $noreg, [[LEA64_32r2]] :: (store (s32) into %stack.11) - ; CHECK-NEXT: MOV64mr %stack.12, 1, $noreg, 0, $noreg, [[LEA64r1]] :: (store (s64) into %stack.12) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.for.cond14.preheader: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.2, align 8) ; CHECK-NEXT: CMP32rm [[MOV32rm3]], %fixed-stack.1, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load (s32) from %fixed-stack.1, align 16) - ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4) - ; CHECK-NEXT: JCC_1 %bb.5, 13, implicit killed $eflags + ; CHECK-NEXT: JCC_1 %bb.5, 13, implicit $eflags ; CHECK-NEXT: JMP_1 %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.for.body17.lr.ph: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) + ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) ; CHECK-NEXT: [[IMUL64rr:%[0-9]+]]:gr64 = nsw IMUL64rr [[IMUL64rr]], [[MOVSX64rr32_]], implicit-def dead $eflags - ; CHECK-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[ADD64rm]], %stack.3, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s64) from %stack.3) - ; CHECK-NEXT: [[ADD64rm1:%[0-9]+]]:gr64 = ADD64rm [[ADD64rm1]], %stack.1, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s64) from %stack.1) - ; CHECK-NEXT: MOV64mr %stack.13, 1, $noreg, 0, $noreg, [[ADD64rm1]] :: (store (s64) into %stack.13) + ; CHECK-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[ADD64rm]], %stack.1, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s64) from %stack.1) + ; CHECK-NEXT: MOV64mr %stack.12, 1, $noreg, 0, $noreg, [[ADD64rm]] :: (store (s64) into %stack.12) ; CHECK-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm %stack.11, 1, $noreg, 0, $noreg :: (load (s32) from %stack.11) ; CHECK-NEXT: undef %68.sub_32bit:gr64_nosp = COPY [[MOV32rm4]] - ; CHECK-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %stack.9) - ; CHECK-NEXT: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5) + ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %stack.9) + ; CHECK-NEXT: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4) ; CHECK-NEXT: JMP_1 %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.for.cond.cleanup: @@ -110,53 +108,52 @@ ; CHECK-NEXT: bb.5.for.cond.cleanup16: ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[MOV64rm3:%[0-9]+]]:gr64 = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) + ; CHECK-NEXT: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) ; CHECK-NEXT: [[ADD64rm1:%[0-9]+]]:gr64 = ADD64rm [[ADD64rm1]], %stack.7, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s64) from %stack.7) - ; CHECK-NEXT: [[MOV64rm3:%[0-9]+]]:gr64 = MOV64rm %stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %stack.10) - ; CHECK-NEXT: ADD64mr %stack.9, 1, $noreg, 0, $noreg, [[MOV64rm3]], implicit-def dead $eflags :: (store (s64) into %stack.9) + ; CHECK-NEXT: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %stack.10) + ; CHECK-NEXT: ADD64mr %stack.9, 1, $noreg, 0, $noreg, [[MOV64rm2]], implicit-def dead $eflags :: (store (s64) into %stack.9) ; CHECK-NEXT: MOV64mr %stack.6, 1, $noreg, 0, $noreg, [[ADD64rm1]] :: (store (s64) into %stack.6) ; CHECK-NEXT: CMP64rm [[ADD64rm1]], %stack.8, 1, $noreg, 0, $noreg, implicit-def $eflags :: (load (s64) from %stack.8) - ; CHECK-NEXT: JCC_1 %bb.2, 12, implicit killed $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 12, implicit $eflags ; CHECK-NEXT: JMP_1 %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6.for.body17: ; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.5(0x04000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV32rm2]].sub_16bit, %88.sub_16bit - ; CHECK-NEXT: [[MOV64rm4:%[0-9]+]]:gr64 = MOV64rm %stack.13, 1, $noreg, 0, $noreg :: (load (s64) from %stack.13) - ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV32rm2]].sub_16bit, [[SUB32rr]].sub_16bit, [[MOV64rm4]], 1, [[MOVSX64rr32_]], 0, $noreg + ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY2]].sub_16bit, %89.sub_16bit + ; CHECK-NEXT: [[MOV64rm3:%[0-9]+]]:gr64 = MOV64rm %stack.12, 1, $noreg, 0, $noreg :: (load (s64) from %stack.12) + ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY2]].sub_16bit, [[SUB32rr]].sub_16bit, [[MOV64rm3]], 1, [[MOVSX64rr32_]], 0, $noreg ; CHECK-NEXT: [[MOVSX64rr32_7:%[0-9]+]]:gr64_nosp = MOVSX64rr32 [[MOVSX64rr32_7]].sub_32bit - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr32 = COPY [[SUB32rr]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[MOV32rm2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY [[MOVSX64rm32_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_2]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY %88 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY [[COPY1]] - ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64 = LEA64r [[COPY10]], 1, [[MOVSX64rr32_7]], 0, $noreg - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr32 = COPY [[LEA64_32r]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64_nosp = COPY [[MOV64rm]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr32 = COPY [[LEA64_32r]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY [[LEA64r1]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr32 = COPY [[SUB32rr]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_3]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_2]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gr64 = COPY [[MOVSX64rr32_]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gr64 = COPY %89 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gr64 = COPY [[COPY1]] + ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64 = LEA64r [[COPY11]], 1, [[MOVSX64rr32_7]], 0, $noreg ; CHECK-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load (s32) from %stack.2) - ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV32rm5]].sub_16bit, [[COPY11]].sub_16bit, [[LEA64r2]], 1, [[COPY12]], 0, $noreg - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gr64 = COPY [[COPY12]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gr32 = COPY [[COPY11]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gr64 = COPY [[COPY10]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gr64_nosp = COPY [[COPY9]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:gr64_nosp = COPY [[COPY8]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:gr64_nosp = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:gr64_nosp = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:gr64 = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:gr32 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:gr32 = COPY [[COPY3]] - ; CHECK-NEXT: [[MOV64rm4:%[0-9]+]]:gr64 = MOV64rm %stack.12, 1, $noreg, 0, $noreg :: (load (s64) from %stack.12) - ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY20]].sub_16bit, [[COPY14]].sub_16bit, [[COPY21]].sub_16bit, [[PTDPBSSDV]], [[PTILELOADDV]], [[PTILELOADDV1]] - ; CHECK-NEXT: PTILESTOREDV [[COPY20]].sub_16bit, [[COPY15]].sub_16bit, [[MOV64rm1]], 1, [[COPY17]], 0, $noreg, [[PTDPBSSDV]] - ; CHECK-NEXT: [[ADD64rr1:%[0-9]+]]:gr64 = ADD64rr [[ADD64rr1]], [[COPY18]], implicit-def dead $eflags - ; CHECK-NEXT: [[ADD64rr2:%[0-9]+]]:gr64 = ADD64rr [[ADD64rr2]], [[MOV64rm4]], implicit-def dead $eflags - ; CHECK-NEXT: [[MOVSX64rr32_7]].sub_32bit:gr64_nosp = ADD32rr [[MOVSX64rr32_7]].sub_32bit, [[COPY14]], implicit-def dead $eflags - ; CHECK-NEXT: CMP64rr [[ADD64rr1]], [[COPY19]], implicit-def $eflags - ; CHECK-NEXT: JCC_1 %bb.6, 12, implicit killed $eflags + ; CHECK-NEXT: [[MOV64rm4:%[0-9]+]]:gr64_nosp = MOV64rm %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) + ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV32rm5]].sub_16bit, [[COPY3]].sub_16bit, [[LEA64r2]], 1, [[MOV64rm4]], 0, $noreg + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64 = COPY [[COPY11]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gr64_nosp = COPY [[COPY10]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gr64_nosp = COPY [[COPY9]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gr64_nosp = COPY [[COPY8]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:gr64_nosp = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:gr32 = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:gr32 = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:gr64 = COPY [[COPY4]] + ; CHECK-NEXT: [[MOV64rm5:%[0-9]+]]:gr64 = MOV64rm %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:gr32 = COPY [[COPY3]] + ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY16]].sub_16bit, [[COPY18]].sub_16bit, [[COPY19]].sub_16bit, [[PTDPBSSDV]], [[PTILELOADDV]], [[PTILELOADDV1]] + ; CHECK-NEXT: PTILESTOREDV [[COPY16]].sub_16bit, [[COPY12]].sub_16bit, [[MOV64rm]], 1, [[COPY14]], 0, $noreg, [[PTDPBSSDV]] + ; CHECK-NEXT: [[ADD64rr1:%[0-9]+]]:gr64 = ADD64rr [[ADD64rr1]], [[COPY15]], implicit-def dead $eflags + ; CHECK-NEXT: [[ADD64rr2:%[0-9]+]]:gr64 = ADD64rr [[ADD64rr2]], [[COPY17]], implicit-def dead $eflags + ; CHECK-NEXT: [[MOVSX64rr32_7]].sub_32bit:gr64_nosp = ADD32rr [[MOVSX64rr32_7]].sub_32bit, [[COPY18]], implicit-def dead $eflags + ; CHECK-NEXT: CMP64rr [[ADD64rr1]], [[MOV64rm5]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.6, 12, implicit $eflags ; CHECK-NEXT: JMP_1 %bb.5 entry: %rem = srem i32 %K, 64 diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll --- a/llvm/test/CodeGen/X86/mul-constant-result.ll +++ b/llvm/test/CodeGen/X86/mul-constant-result.ll @@ -156,7 +156,7 @@ ; X86-NEXT: .LBB0_39: ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %eax, %eax ; X86-NEXT: .LBB0_27: ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi @@ -267,7 +267,7 @@ ; X64-HSW-NEXT: .LBB0_22: ; X64-HSW-NEXT: movl %eax, %ecx ; X64-HSW-NEXT: shll $4, %ecx -; X64-HSW-NEXT: jmp .LBB0_35 +; X64-HSW-NEXT: jmp .LBB0_34 ; X64-HSW-NEXT: .LBB0_23: ; X64-HSW-NEXT: addl %eax, %eax ; X64-HSW-NEXT: .LBB0_11: @@ -292,7 +292,7 @@ ; X64-HSW-NEXT: .LBB0_27: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx -; X64-HSW-NEXT: jmp .LBB0_35 +; X64-HSW-NEXT: jmp .LBB0_34 ; X64-HSW-NEXT: .LBB0_28: ; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx ; X64-HSW-NEXT: shll $3, %ecx @@ -310,7 +310,7 @@ ; X64-HSW-NEXT: .LBB0_31: ; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,4), %ecx -; X64-HSW-NEXT: jmp .LBB0_35 +; X64-HSW-NEXT: jmp .LBB0_34 ; X64-HSW-NEXT: .LBB0_32: ; X64-HSW-NEXT: leal (%rax,%rax,8), %eax ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax @@ -319,16 +319,18 @@ ; X64-HSW-NEXT: .LBB0_33: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: jmp .LBB0_35 ; X64-HSW-NEXT: .LBB0_34: -; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx -; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: addl %eax, %ecx -; X64-HSW-NEXT: .LBB0_35: ; X64-HSW-NEXT: addl %eax, %ecx ; X64-HSW-NEXT: movl %ecx, %eax ; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax ; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_35: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: addl %ecx, %eax +; X64-HSW-NEXT: # kill: def $eax killed $eax killed $rax +; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_36: ; X64-HSW-NEXT: leal (%rax,%rax), %ecx ; X64-HSW-NEXT: shll $5, %eax diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll --- a/llvm/test/CodeGen/X86/optimize-max-0.ll +++ b/llvm/test/CodeGen/X86/optimize-max-0.ll @@ -16,65 +16,65 @@ ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $28, %esp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: imull %ebp, %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: imull %esi, %eax ; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %ebx, (%esp) ## 4-byte Spill +; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill ; CHECK-NEXT: je LBB0_19 ; CHECK-NEXT: ## %bb.1: ## %bb10.preheader -; CHECK-NEXT: movl %ebx, %ecx -; CHECK-NEXT: sarl $31, %ecx -; CHECK-NEXT: shrl $30, %ecx -; CHECK-NEXT: addl %ebx, %ecx -; CHECK-NEXT: sarl $2, %ecx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl %eax, %ebp +; CHECK-NEXT: sarl $31, %ebp +; CHECK-NEXT: shrl $30, %ebp +; CHECK-NEXT: addl %eax, %ebp +; CHECK-NEXT: sarl $2, %ebp +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: jle LBB0_12 ; CHECK-NEXT: ## %bb.2: ## %bb.nph9 -; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: testl %esi, %esi ; CHECK-NEXT: jle LBB0_12 ; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: incl %eax ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_4: ## %bb6 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx -; CHECK-NEXT: movb %bl, (%edx,%esi) -; CHECK-NEXT: incl %esi -; CHECK-NEXT: cmpl %ebp, %esi +; CHECK-NEXT: movzbl (%eax,%edi,2), %ebx +; CHECK-NEXT: movb %bl, (%edx,%edi) +; CHECK-NEXT: incl %edi +; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: jl LBB0_4 ; CHECK-NEXT: ## %bb.5: ## %bb9 ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: addl %ebp, %edx -; CHECK-NEXT: cmpl %edi, %ecx +; CHECK-NEXT: addl %esi, %edx +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: je LBB0_12 ; CHECK-NEXT: ## %bb.6: ## %bb7.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_4 Depth=1 -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: jmp LBB0_4 ; CHECK-NEXT: LBB0_12: ## %bb18.loopexit +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl (%esp), %eax ## 4-byte Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %ebp, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: jle LBB0_13 ; CHECK-NEXT: ## %bb.7: ## %bb.nph5 -; CHECK-NEXT: cmpl $2, %ebp +; CHECK-NEXT: cmpl $2, %esi ; CHECK-NEXT: jl LBB0_13 ; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split -; CHECK-NEXT: movl %ebp, %edx -; CHECK-NEXT: shrl $31, %edx -; CHECK-NEXT: addl %ebp, %edx -; CHECK-NEXT: sarl %edx +; CHECK-NEXT: movl %esi, %ebp +; CHECK-NEXT: shrl $31, %ebp +; CHECK-NEXT: addl %esi, %ebp +; CHECK-NEXT: sarl %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $31, %ecx @@ -84,12 +84,12 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: addl $2, %esi -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl (%esp), %esi ## 4-byte Reload -; CHECK-NEXT: addl %esi, %ecx -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl $2, %edx +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload +; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_9: ## %bb13 @@ -97,85 +97,89 @@ ; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: addl %esi, %edi +; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %edx, %edi ; CHECK-NEXT: imull {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_10: ## %bb14 ; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movzbl -2(%edi,%esi,4), %ebx -; CHECK-NEXT: movb %bl, (%ecx,%esi) -; CHECK-NEXT: movzbl (%edi,%esi,4), %ebx -; CHECK-NEXT: movb %bl, (%eax,%esi) -; CHECK-NEXT: incl %esi -; CHECK-NEXT: cmpl %edx, %esi +; CHECK-NEXT: movzbl -2(%edi,%ebx,4), %edx +; CHECK-NEXT: movb %dl, (%ecx,%ebx) +; CHECK-NEXT: movzbl (%edi,%ebx,4), %edx +; CHECK-NEXT: movb %dl, (%eax,%ebx) +; CHECK-NEXT: incl %ebx +; CHECK-NEXT: cmpl %ebp, %ebx ; CHECK-NEXT: jl LBB0_10 ; CHECK-NEXT: ## %bb.11: ## %bb17 ; CHECK-NEXT: ## in Loop: Header=BB0_9 Depth=1 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload ; CHECK-NEXT: incl %edi -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: addl $2, %esi -; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: addl %ebp, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload +; CHECK-NEXT: addl $2, %edx +; CHECK-NEXT: addl %ebp, %ecx ; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload ; CHECK-NEXT: jl LBB0_9 ; CHECK-NEXT: LBB0_13: ## %bb20 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl $1, %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: je LBB0_19 ; CHECK-NEXT: ## %bb.14: ## %bb20 -; CHECK-NEXT: cmpl $3, %eax +; CHECK-NEXT: cmpl $3, %ecx ; CHECK-NEXT: jne LBB0_24 ; CHECK-NEXT: ## %bb.15: ## %bb22 -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp ## 4-byte Reload +; CHECK-NEXT: addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: jle LBB0_18 ; CHECK-NEXT: ## %bb.16: ## %bb.nph -; CHECK-NEXT: leal 15(%edi), %eax +; CHECK-NEXT: leal 15(%edx), %eax ; CHECK-NEXT: andl $-16, %eax ; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: addl %eax, %ecx -; CHECK-NEXT: leal 15(%ebp), %eax +; CHECK-NEXT: addl %ebp, %ebp +; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: addl %edi, %ecx +; CHECK-NEXT: addl %ecx, %ebp +; CHECK-NEXT: addl %eax, %ebx +; CHECK-NEXT: leal 15(%esi), %eax ; CHECK-NEXT: andl $-16, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: addl %esi, %esi -; CHECK-NEXT: addl (%esp), %esi ## 4-byte Folded Reload -; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_17: ## %bb23 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl %ecx, %ebx +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %ebp, %edi +; CHECK-NEXT: movl %ebx, %ebp +; CHECK-NEXT: movl %edx, %ebx ; CHECK-NEXT: calll _memcpy -; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: movl %ebx, %edx +; CHECK-NEXT: movl %ebp, %ebx +; CHECK-NEXT: movl %edi, %ebp ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: addl %ebp, %esi -; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; CHECK-NEXT: decl %edi +; CHECK-NEXT: addl %esi, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: decl %edx ; CHECK-NEXT: jne LBB0_17 ; CHECK-NEXT: LBB0_18: ## %bb26 -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload ; CHECK-NEXT: jmp LBB0_23 ; CHECK-NEXT: LBB0_19: ## %bb29 -; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: ## %bb.20: ## %bb.nph11 -; CHECK-NEXT: movl %edi, %esi -; CHECK-NEXT: leal 15(%ebp), %eax +; CHECK-NEXT: leal 15(%esi), %eax ; CHECK-NEXT: andl $-16, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi @@ -183,30 +187,32 @@ ; CHECK-NEXT: LBB0_21: ## %bb30 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi -; CHECK-NEXT: movl %ecx, %ebx +; CHECK-NEXT: movl %ebx, %ebp +; CHECK-NEXT: movl %edx, %ebx ; CHECK-NEXT: calll _memcpy -; CHECK-NEXT: movl %ebx, %ecx +; CHECK-NEXT: movl %ebx, %edx +; CHECK-NEXT: movl %ebp, %ebx ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: addl %ebp, %edi -; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload -; CHECK-NEXT: decl %esi +; CHECK-NEXT: addl %esi, %edi +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: decl %edx ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: LBB0_22: ## %bb33 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload -; CHECK-NEXT: addl %edx, %ecx +; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: addl %ecx, %edx ; CHECK-NEXT: LBB0_23: ## %bb33 -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: sarl %eax ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: pushl $128 -; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %edx ; CHECK-NEXT: calll _memset ; CHECK-NEXT: addl $44, %esp ; CHECK-NEXT: LBB0_25: ## %return @@ -599,15 +605,9 @@ ; CHECK-NEXT: jne LBB1_17 ; CHECK-NEXT: LBB1_18: ## %bb26 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: addl %eax, %edx -; CHECK-NEXT: shrl %ecx -; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: pushl %ecx -; CHECK-NEXT: pushl $128 -; CHECK-NEXT: pushl %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Folded Reload ; CHECK-NEXT: jmp LBB1_23 ; CHECK-NEXT: LBB1_19: ## %bb29 ; CHECK-NEXT: testl %ebp, %ebp @@ -638,12 +638,12 @@ ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: LBB1_23: ## %bb33 ; CHECK-NEXT: shrl %eax ; CHECK-NEXT: subl $4, %esp ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: pushl $128 ; CHECK-NEXT: pushl %ecx -; CHECK-NEXT: LBB1_23: ## %bb33 ; CHECK-NEXT: calll _memset ; CHECK-NEXT: addl $44, %esp ; CHECK-NEXT: LBB1_25: ## %return diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -19,17 +19,17 @@ ; X64-NEXT: movslq %edx, %rcx ; X64-NEXT: shlq $2, %rcx ; X64-NEXT: xorl %eax, %eax +; X64-NEXT: leaq (%rcx,%rcx), %rdx ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_1: # %loop ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: addl (%rdi), %eax -; X64-NEXT: leaq (%rdi,%rcx), %rdx ; X64-NEXT: addl (%rdi,%rcx), %eax -; X64-NEXT: leaq (%rdx,%rcx), %r8 -; X64-NEXT: addl (%rcx,%rdx), %eax +; X64-NEXT: leaq (%rdi,%rcx), %r8 ; X64-NEXT: addl (%rcx,%r8), %eax ; X64-NEXT: addq %rcx, %r8 -; X64-NEXT: addq %rcx, %r8 +; X64-NEXT: addl (%rcx,%r8), %eax +; X64-NEXT: addq %rdx, %r8 ; X64-NEXT: movq %r8, %rdi ; X64-NEXT: cmpq %rsi, %r8 ; X64-NEXT: jne .LBB0_1 @@ -42,22 +42,22 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: shll $2, %edx ; X32-NEXT: xorl %eax, %eax +; X32-NEXT: leal (%edx,%edx), %esi ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB0_1: # %loop ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: addl (%esi), %eax -; X32-NEXT: leal (%esi,%edx), %edi -; X32-NEXT: addl (%esi,%edx), %eax +; X32-NEXT: addl (%edi), %eax +; X32-NEXT: addl (%edi,%edx), %eax ; X32-NEXT: leal (%edi,%edx), %ebx -; X32-NEXT: addl (%edx,%edi), %eax ; X32-NEXT: addl (%edx,%ebx), %eax ; X32-NEXT: addl %edx, %ebx -; X32-NEXT: addl %edx, %ebx -; X32-NEXT: movl %ebx, %esi +; X32-NEXT: addl (%edx,%ebx), %eax +; X32-NEXT: addl %esi, %ebx +; X32-NEXT: movl %ebx, %edi ; X32-NEXT: cmpl %ecx, %ebx ; X32-NEXT: jne .LBB0_1 ; X32-NEXT: # %bb.2: # %exit