Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5446,6 +5446,7 @@ let Inst{3-0} = Rm{3-0}; let Uses = [CPSR]; + let hasSideEffects = 0; } def t2CSEL : CS<"csel", 0b1000>; Index: llvm/lib/Target/ARM/Thumb2InstrInfo.h =================================================================== --- llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -60,6 +60,10 @@ /// const ThumbRegisterInfo &getRegisterInfo() const override { return RI; } + MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const override; + private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; Index: llvm/lib/Target/ARM/Thumb2InstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -12,6 +12,7 @@ #include "Thumb2InstrInfo.h" #include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -118,6 +119,31 @@ return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL; } +MachineInstr * +Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool PreferFalse) const { + // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the + // MOVCC into another instruction. If that fails on 8.1-M fall back to using a + // CSEL. + MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse); + if (!RV && getSubtarget().hasV8_1MMainlineOps()) { + Register DestReg = MI.getOperand(0).getReg(); + + if (!DestReg.isVirtual()) + return nullptr; + + MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), + get(ARM::t2CSEL), DestReg) + .add(MI.getOperand(2)) + .add(MI.getOperand(1)) + .add(MI.getOperand(3)); + SeenMIs.insert(NewMI); + return NewMI; + } + return RV; +} + void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, Index: llvm/test/CodeGen/Thumb2/csel.ll =================================================================== --- llvm/test/CodeGen/Thumb2/csel.ll +++ llvm/test/CodeGen/Thumb2/csel.ll @@ -107,9 +107,7 @@ ; CHECK-LABEL: csel_var: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r0, #45 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r1, r2 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: csel r0, r1, r2, gt ; CHECK-NEXT: bx lr entry: %cmp = icmp sgt i32 %a, 45 Index: llvm/test/CodeGen/Thumb2/float-ops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/float-ops.ll +++ llvm/test/CodeGen/Thumb2/float-ops.ll @@ -278,8 +278,10 @@ ; CHECK-LABEL: select_d: ; NONE: ldr{{(.w)?}} [[REG:r[0-9]+]], [sp] ; NONE: ands [[REG]], [[REG]], #1 -; NONE-DAG: moveq r0, r2 -; NONE-DAG: moveq r1, r3 +; NOREGS-DAG: moveq r0, r2 +; NOREGS-DAG: moveq r1, r3 +; ONLYREGS-DAG: csel r0, r0, r2 +; ONLYREGS-DAG: csel r1, r1, r3 ; SP: ands r0, r0, #1 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1 Index: llvm/test/CodeGen/Thumb2/mve-abs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-abs.ll +++ llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -42,33 +42,30 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: vmov lr, s4 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: rsbs.w r3, lr, #0 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: rsbs.w lr, r1, #0 ; CHECK-NEXT: sbc.w r2, r12, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: ands r1, r1, #1 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r2, r0 -; CHECK-NEXT: moveq r3, lr -; CHECK-NEXT: vmov lr, s6 -; CHECK-NEXT: vmov.32 q0[0], r3 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov.32 q0[1], r2 -; CHECK-NEXT: rsbs.w r2, lr, #0 -; CHECK-NEXT: sbc.w r3, r12, r0 +; CHECK-NEXT: cset r3, mi +; CHECK-NEXT: ands r3, r3, #1 +; CHECK-NEXT: csel r1, lr, r1, ne +; CHECK-NEXT: csel r0, r2, r0, ne +; CHECK-NEXT: vmov.32 q1[0], r1 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: rsbs r2, r1, #0 +; CHECK-NEXT: sbc.w r12, r12, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r1, mi -; CHECK-NEXT: ands r1, r1, #1 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq r2, lr -; CHECK-NEXT: vmov.32 q0[2], r2 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: vmov.32 q0[3], r3 +; CHECK-NEXT: cset r3, mi +; CHECK-NEXT: ands r3, r3, #1 +; CHECK-NEXT: csel r1, r2, r1, ne +; CHECK-NEXT: csel r0, r12, r0, ne +; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: vmov.32 q1[3], r0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: pop {r7, pc} entry: %0 = icmp slt <2 x i64> %s1, zeroinitializer Index: llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -125,14 +125,12 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: moveq r4, r1 +; CHECK-NEXT: csel r4, r4, r1, ne +; CHECK-NEXT: csel r3, r3, r0, ne ; CHECK-NEXT: subs r5, r4, r2 ; CHECK-NEXT: sbcs r3, r3, #0 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r4, r2 -; CHECK-NEXT: str r4, [r11], #4 +; CHECK-NEXT: csel r3, r4, r2, lt +; CHECK-NEXT: str r3, [r11], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #8 @@ -406,22 +404,20 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r12], #4 ; CHECK-NEXT: ldr r4, [r10], #4 -; CHECK-NEXT: smull r4, r5, r4, r2 -; CHECK-NEXT: asrl r4, r5, #31 -; CHECK-NEXT: subs r2, r1, r4 -; CHECK-NEXT: sbcs.w r2, r0, r5 -; CHECK-NEXT: mov.w r2, #0 +; CHECK-NEXT: smull r2, r5, r4, r2 +; CHECK-NEXT: asrl r2, r5, #31 +; CHECK-NEXT: subs r4, r1, r2 +; CHECK-NEXT: sbcs.w r4, r0, r5 +; CHECK-NEXT: mov.w r4, #0 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r5, r0 -; CHECK-NEXT: moveq r4, r1 -; CHECK-NEXT: subs r2, r4, r3 -; CHECK-NEXT: sbcs r2, r5, #0 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r4, r3 -; CHECK-NEXT: str r4, [r11], #4 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r2, r2, r1, ne +; CHECK-NEXT: csel r4, r5, r0, ne +; CHECK-NEXT: subs r5, r2, r3 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: csel r2, r2, r3, lt +; CHECK-NEXT: str r2, [r11], #4 ; CHECK-NEXT: le lr, .LBB1_7 ; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #8 @@ -1158,9 +1154,8 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: asrlt r3, r2, #15 ; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, r1 -; CHECK-NEXT: strh r3, [r4], #2 +; CHECK-NEXT: csel r2, r3, r1, lt +; CHECK-NEXT: strh r2, [r4], #2 ; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1300,9 +1295,8 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: asrlt r3, r2, #15 ; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, r1 -; CHECK-NEXT: strh r3, [r4], #2 +; CHECK-NEXT: csel r2, r3, r1, lt +; CHECK-NEXT: strh r2, [r4], #2 ; CHECK-NEXT: le lr, .LBB6_7 ; CHECK-NEXT: .LBB6_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -1439,9 +1433,8 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: asrlt r3, r2, #15 ; CHECK-NEXT: cmp r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, r1 -; CHECK-NEXT: strh r3, [r4], #2 +; CHECK-NEXT: csel r2, r3, r1, lt +; CHECK-NEXT: strh r2, [r4], #2 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc} Index: llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll @@ -732,8 +732,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r2, r1 +; CHECK-NEXT: csel r2, r2, r1, lt ; CHECK-NEXT: le lr, .LBB7_8 ; CHECK-NEXT: .LBB7_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -819,8 +818,7 @@ ; CHECK-NEXT: mvn r4, #-2147483648 ; CHECK-NEXT: vminv.s32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r0, r4 +; CHECK-NEXT: csel r0, r0, r4, lt ; CHECK-NEXT: le lr, .LBB8_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -834,8 +832,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r0, r2 +; CHECK-NEXT: csel r0, r0, r2, lt ; CHECK-NEXT: le lr, .LBB8_8 ; CHECK-NEXT: .LBB8_9: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -933,8 +930,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r2, r1 +; CHECK-NEXT: csel r2, r2, r1, gt ; CHECK-NEXT: le lr, .LBB9_8 ; CHECK-NEXT: .LBB9_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -1020,8 +1016,7 @@ ; CHECK-NEXT: mov.w r4, #-2147483648 ; CHECK-NEXT: vmaxv.s32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r0, r4 +; CHECK-NEXT: csel r0, r0, r4, gt ; CHECK-NEXT: le lr, .LBB10_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -1035,8 +1030,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it le -; CHECK-NEXT: movle r0, r2 +; CHECK-NEXT: csel r0, r0, r2, gt ; CHECK-NEXT: le lr, .LBB10_8 ; CHECK-NEXT: .LBB10_9: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -1134,8 +1128,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it hs -; CHECK-NEXT: movhs r2, r1 +; CHECK-NEXT: csel r2, r2, r1, lo ; CHECK-NEXT: le lr, .LBB11_8 ; CHECK-NEXT: .LBB11_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -1221,8 +1214,7 @@ ; CHECK-NEXT: mov.w r4, #-1 ; CHECK-NEXT: vminv.u32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it hs -; CHECK-NEXT: movhs r0, r4 +; CHECK-NEXT: csel r0, r0, r4, lo ; CHECK-NEXT: le lr, .LBB12_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -1236,8 +1228,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r2 +; CHECK-NEXT: csel r0, r0, r2, hi ; CHECK-NEXT: le lr, .LBB12_8 ; CHECK-NEXT: .LBB12_9: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -1335,8 +1326,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r1, [r0], #4 ; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r2, r1 +; CHECK-NEXT: csel r2, r2, r1, hi ; CHECK-NEXT: le lr, .LBB13_8 ; CHECK-NEXT: .LBB13_9: @ %for.cond.cleanup ; CHECK-NEXT: mov r0, r2 @@ -1418,8 +1408,7 @@ ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: vmaxv.u32 r4, q0 ; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r4 +; CHECK-NEXT: csel r0, r0, r4, hi ; CHECK-NEXT: le lr, .LBB14_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block ; CHECK-NEXT: cmp r3, r1 @@ -1433,8 +1422,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr r2, [r1], #4 ; CHECK-NEXT: cmp r0, r2 -; CHECK-NEXT: it ls -; CHECK-NEXT: movls r0, r2 +; CHECK-NEXT: csel r0, r0, r2, hi ; CHECK-NEXT: le lr, .LBB14_6 ; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} Index: llvm/test/CodeGen/Thumb2/mve-vmaxv.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vmaxv.ll +++ llvm/test/CodeGen/Thumb2/mve-vmaxv.ll @@ -145,8 +145,7 @@ ; CHECK-NEXT: vmaxv.s8 r1, q0 ; CHECK-NEXT: sxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) %c = icmp sgt i8 %r, %s2 @@ -161,8 +160,7 @@ ; CHECK-NEXT: vmaxv.s8 r1, q0 ; CHECK-NEXT: sxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1) %rs = sext i8 %r to i32 @@ -180,8 +178,7 @@ ; CHECK-NEXT: vmaxv.s16 r1, q0 ; CHECK-NEXT: sxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) %c = icmp sgt i16 %r, %s2 @@ -197,8 +194,7 @@ ; CHECK-NEXT: vmaxv.s16 r1, q0 ; CHECK-NEXT: sxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1) %rs = sext i16 %r to i32 @@ -213,8 +209,7 @@ ; CHECK-NEXT: mov.w r1, #-2147483648 ; CHECK-NEXT: vmaxv.s32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, gt ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1) %c = icmp sgt i32 %r, %s2 @@ -230,8 +225,7 @@ ; CHECK-NEXT: vmaxv.u8 r1, q0 ; CHECK-NEXT: uxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) %c = icmp ugt i8 %r, %s2 @@ -246,8 +240,7 @@ ; CHECK-NEXT: vmaxv.u8 r1, q0 ; CHECK-NEXT: uxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1) %rs = zext i8 %r to i32 @@ -264,8 +257,7 @@ ; CHECK-NEXT: vmaxv.u16 r1, q0 ; CHECK-NEXT: uxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) %c = icmp ugt i16 %r, %s2 @@ -280,8 +272,7 @@ ; CHECK-NEXT: vmaxv.u16 r1, q0 ; CHECK-NEXT: uxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1) %rs = zext i16 %r to i32 @@ -296,8 +287,7 @@ ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: vmaxv.u32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it hi -; CHECK-NEXT: movhi r0, r1 +; CHECK-NEXT: csel r0, r1, r0, hi ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1) %c = icmp ugt i32 %r, %s2 @@ -313,8 +303,7 @@ ; CHECK-NEXT: vminv.s8 r1, q0 ; CHECK-NEXT: sxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) %c = icmp slt i8 %r, %s2 @@ -329,8 +318,7 @@ ; CHECK-NEXT: vminv.s8 r1, q0 ; CHECK-NEXT: sxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1) %rs = sext i8 %r to i32 @@ -347,8 +335,7 @@ ; CHECK-NEXT: vminv.s16 r1, q0 ; CHECK-NEXT: sxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) %c = icmp slt i16 %r, %s2 @@ -363,8 +350,7 @@ ; CHECK-NEXT: vminv.s16 r1, q0 ; CHECK-NEXT: sxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1) %rs = sext i16 %r to i32 @@ -379,8 +365,7 @@ ; CHECK-NEXT: mvn r1, #-2147483648 ; CHECK-NEXT: vminv.s32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lt ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1) %c = icmp slt i32 %r, %s2 @@ -396,8 +381,7 @@ ; CHECK-NEXT: vminv.u8 r1, q0 ; CHECK-NEXT: uxtb r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) %c = icmp ult i8 %r, %s2 @@ -412,8 +396,7 @@ ; CHECK-NEXT: vminv.u8 r1, q0 ; CHECK-NEXT: uxtb r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1) %rs = zext i8 %r to i32 @@ -430,8 +413,7 @@ ; CHECK-NEXT: vminv.u16 r1, q0 ; CHECK-NEXT: uxth r2, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) %c = icmp ult i16 %r, %s2 @@ -446,8 +428,7 @@ ; CHECK-NEXT: vminv.u16 r1, q0 ; CHECK-NEXT: uxth r1, r1 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1) %rs = zext i16 %r to i32 @@ -462,8 +443,7 @@ ; CHECK-NEXT: mov.w r1, #-1 ; CHECK-NEXT: vminv.u32 r1, q0 ; CHECK-NEXT: cmp r1, r0 -; CHECK-NEXT: it lo -; CHECK-NEXT: movlo r0, r1 +; CHECK-NEXT: csel r0, r1, r0, lo ; CHECK-NEXT: bx lr %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1) %c = icmp ult i32 %r, %s2