Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1560,6 +1560,12 @@ setTargetDAGCombine(ISD::SRL); if (Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::SHL); + // Attempt to lower smin/smax to ssat/usat + if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || + Subtarget->isThumb2()) { + setTargetDAGCombine(ISD::SMIN); + setTargetDAGCombine(ISD::SMAX); + } setStackPointerRegisterToSaveRestore(ARM::SP); @@ -17552,12 +17558,57 @@ return SDValue(); } +// Lower smin(smax(x, C1), C2) to ssat or usat, if we they have saturating +// constant bounds. +static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) && + !Subtarget->isThumb2()) + return SDValue(); + + EVT VT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + + if (VT != MVT::i32 || + (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) || + !isa(Op.getOperand(1)) || + !isa(Op0.getOperand(1))) + return SDValue(); + + SDValue Min = Op; + SDValue Max = Op0; + SDValue Input = Op0.getOperand(0); + if (Min.getOpcode() == ISD::SMAX) + std::swap(Min, Max); + + APInt MinC = Min.getConstantOperandAPInt(1); + APInt MaxC = Max.getConstantOperandAPInt(1); + + if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX || + !(MinC + 1).isPowerOf2()) + return SDValue(); + + SDLoc DL(Op); + if (MinC == ~MaxC) + return DAG.getNode(ARMISD::SSAT, DL, VT, Input, + DAG.getConstant(MinC.countTrailingOnes(), DL, VT)); + if (MaxC == 0) + return DAG.getNode(ARMISD::USAT, DL, VT, Input, + DAG.getConstant(MinC.countTrailingOnes(), DL, VT)); + + return SDValue(); +} + /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating /// saturates. static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); + + if (VT == MVT::i32) + return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST); + if (!ST->hasMVEIntegerOps()) return SDValue(); Index: llvm/test/CodeGen/ARM/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/ARM/fpclamptosat.ll +++ llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -2718,28 +2718,14 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2iz -; VFP2-NEXT: movw r1, #32767 -; VFP2-NEXT: cmp r0, r1 -; VFP2-NEXT: it ge -; VFP2-NEXT: movge r0, r1 -; VFP2-NEXT: movw r1, #32768 -; VFP2-NEXT: movt r1, #65535 -; VFP2-NEXT: cmn.w r0, #32768 -; VFP2-NEXT: it le -; VFP2-NEXT: movle r0, r1 +; VFP2-NEXT: ssat r0, #16, r0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: stest_f64i16_mm: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.s32.f64 s0, d0 -; FULL-NEXT: movw r1, #32767 ; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: cmp r0, r1 -; FULL-NEXT: csel r0, r0, r1, lt -; FULL-NEXT: movw r1, #32768 -; FULL-NEXT: movt r1, #65535 -; FULL-NEXT: cmn.w r0, #32768 -; FULL-NEXT: csel r0, r0, r1, gt +; FULL-NEXT: ssat r0, #16, r0 ; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i32 @@ -2820,21 +2806,14 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2iz -; VFP2-NEXT: movw r1, #65535 -; VFP2-NEXT: cmp r0, r1 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, r0 -; VFP2-NEXT: bic.w r0, r1, r1, asr #31 +; VFP2-NEXT: usat r0, #16, r0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f64i16_mm: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.s32.f64 s0, d0 -; FULL-NEXT: movw r1, #65535 ; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: cmp r0, r1 -; FULL-NEXT: csel r0, r0, r1, lt -; FULL-NEXT: bic.w r0, r0, r0, asr #31 +; FULL-NEXT: usat r0, #16, r0 ; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i32 @@ -2870,33 +2849,12 @@ ; SOFT-NEXT: .LCPI39_1: ; SOFT-NEXT: .long 4294934528 @ 0xffff8000 ; -; VFP2-LABEL: stest_f32i16_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: vcvt.s32.f32 s0, s0 -; VFP2-NEXT: movw r1, #32767 -; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: cmp r0, r1 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, r0 -; VFP2-NEXT: movw r0, #32768 -; VFP2-NEXT: cmn.w r1, #32768 -; VFP2-NEXT: movt r0, #65535 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, r1 -; VFP2-NEXT: bx lr -; -; FULL-LABEL: stest_f32i16_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: vcvt.s32.f32 s0, s0 -; FULL-NEXT: movw r1, #32767 -; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: cmp r0, r1 -; FULL-NEXT: csel r0, r0, r1, lt -; FULL-NEXT: movw r1, #32768 -; FULL-NEXT: movt r1, #65535 -; FULL-NEXT: cmn.w r0, #32768 -; FULL-NEXT: csel r0, r0, r1, gt -; FULL-NEXT: bx lr +; VFP-LABEL: stest_f32i16_mm: +; VFP: @ %bb.0: @ %entry +; VFP-NEXT: vcvt.s32.f32 s0, s0 +; VFP-NEXT: vmov r0, s0 +; VFP-NEXT: ssat r0, #16, r0 +; VFP-NEXT: bx lr entry: %conv = fptosi float %x to i32 %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767) @@ -2968,26 +2926,12 @@ ; SOFT-NEXT: .LCPI41_0: ; SOFT-NEXT: .long 65535 @ 0xffff ; -; VFP2-LABEL: ustest_f32i16_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: vcvt.s32.f32 s0, s0 -; VFP2-NEXT: movw r1, #65535 -; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: cmp r0, r1 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, r0 -; VFP2-NEXT: bic.w r0, r1, r1, asr #31 -; VFP2-NEXT: bx lr -; -; FULL-LABEL: ustest_f32i16_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: vcvt.s32.f32 s0, s0 -; FULL-NEXT: movw r1, #65535 -; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: cmp r0, r1 -; FULL-NEXT: csel r0, r0, r1, lt -; FULL-NEXT: bic.w r0, r0, r0, asr #31 -; FULL-NEXT: bx lr +; VFP-LABEL: ustest_f32i16_mm: +; VFP: @ %bb.0: @ %entry +; VFP-NEXT: vcvt.s32.f32 s0, s0 +; VFP-NEXT: vmov r0, s0 +; VFP-NEXT: usat r0, #16, r0 +; VFP-NEXT: bx lr entry: %conv = fptosi float %x to i32 %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535) @@ -3031,30 +2975,16 @@ ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: movw r1, #32767 ; VFP2-NEXT: vcvt.s32.f32 s0, s0 ; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: cmp r0, r1 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, r0 -; VFP2-NEXT: movw r0, #32768 -; VFP2-NEXT: cmn.w r1, #32768 -; VFP2-NEXT: movt r0, #65535 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, r1 +; VFP2-NEXT: ssat r0, #16, r0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: stest_f16i16_mm: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.s32.f16 s0, s0 -; FULL-NEXT: movw r1, #32767 ; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: cmp r0, r1 -; FULL-NEXT: csel r0, r0, r1, lt -; FULL-NEXT: movw r1, #32768 -; FULL-NEXT: movt r1, #65535 -; FULL-NEXT: cmn.w r0, #32768 -; FULL-NEXT: csel r0, r0, r1, gt +; FULL-NEXT: ssat r0, #16, r0 ; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i32 @@ -3143,23 +3073,16 @@ ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f ; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: movw r1, #65535 ; VFP2-NEXT: vcvt.s32.f32 s0, s0 ; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: cmp r0, r1 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r1, r0 -; VFP2-NEXT: bic.w r0, r1, r1, asr #31 +; VFP2-NEXT: usat r0, #16, r0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: ustest_f16i16_mm: ; FULL: @ %bb.0: @ %entry ; FULL-NEXT: vcvt.s32.f16 s0, s0 -; FULL-NEXT: movw r1, #65535 ; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: cmp r0, r1 -; FULL-NEXT: csel r0, r0, r1, lt -; FULL-NEXT: bic.w r0, r0, r0, asr #31 +; FULL-NEXT: usat r0, #16, r0 ; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i32 Index: llvm/test/CodeGen/ARM/sadd_sat.ll =================================================================== --- llvm/test/CodeGen/ARM/sadd_sat.ll +++ llvm/test/CodeGen/ARM/sadd_sat.ll @@ -148,15 +148,7 @@ ; CHECK-T2NODSP-LABEL: func16: ; CHECK-T2NODSP: @ %bb.0: ; CHECK-T2NODSP-NEXT: add r0, r1 -; CHECK-T2NODSP-NEXT: movw r1, #32767 -; CHECK-T2NODSP-NEXT: cmp r0, r1 -; CHECK-T2NODSP-NEXT: it lt -; CHECK-T2NODSP-NEXT: movlt r1, r0 -; CHECK-T2NODSP-NEXT: movw r0, #32768 -; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 -; CHECK-T2NODSP-NEXT: movt r0, #65535 -; CHECK-T2NODSP-NEXT: it gt -; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: ssat r0, #16, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func16: @@ -219,12 +211,7 @@ ; CHECK-T2NODSP-LABEL: func8: ; CHECK-T2NODSP: @ %bb.0: ; CHECK-T2NODSP-NEXT: add r0, r1 -; CHECK-T2NODSP-NEXT: cmp r0, #127 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #127 -; CHECK-T2NODSP-NEXT: cmn.w r0, #128 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: ssat r0, #8, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func8: @@ -280,12 +267,7 @@ ; CHECK-T2NODSP-LABEL: func3: ; CHECK-T2NODSP: @ %bb.0: ; CHECK-T2NODSP-NEXT: add r0, r1 -; CHECK-T2NODSP-NEXT: cmp r0, #7 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #7 -; CHECK-T2NODSP-NEXT: cmn.w r0, #8 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: ssat r0, #4, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func3: Index: llvm/test/CodeGen/ARM/sadd_sat_plus.ll =================================================================== --- llvm/test/CodeGen/ARM/sadd_sat_plus.ll +++ llvm/test/CodeGen/ARM/sadd_sat_plus.ll @@ -151,15 +151,7 @@ ; CHECK-T2NODSP-NEXT: muls r1, r2, r1 ; CHECK-T2NODSP-NEXT: sxth r1, r1 ; CHECK-T2NODSP-NEXT: add r0, r1 -; CHECK-T2NODSP-NEXT: movw r1, #32767 -; CHECK-T2NODSP-NEXT: cmp r0, r1 -; CHECK-T2NODSP-NEXT: it lt -; CHECK-T2NODSP-NEXT: movlt r1, r0 -; CHECK-T2NODSP-NEXT: movw r0, #32768 -; CHECK-T2NODSP-NEXT: movt r0, #65535 -; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 -; CHECK-T2NODSP-NEXT: it gt -; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: ssat r0, #16, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func16: @@ -205,12 +197,7 @@ ; CHECK-T2NODSP-NEXT: muls r1, r2, r1 ; CHECK-T2NODSP-NEXT: sxtb r1, r1 ; CHECK-T2NODSP-NEXT: add r0, r1 -; CHECK-T2NODSP-NEXT: cmp r0, #127 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #127 -; CHECK-T2NODSP-NEXT: cmn.w r0, #128 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: ssat r0, #8, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func8: @@ -257,12 +244,7 @@ ; CHECK-T2NODSP-NEXT: muls r1, r2, r1 ; CHECK-T2NODSP-NEXT: lsls r1, r1, #28 ; CHECK-T2NODSP-NEXT: add.w r0, r0, r1, asr #28 -; CHECK-T2NODSP-NEXT: cmp r0, #7 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #7 -; CHECK-T2NODSP-NEXT: cmn.w r0, #8 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: ssat r0, #4, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func4: Index: llvm/test/CodeGen/ARM/ssat-unroll-loops.ll =================================================================== --- llvm/test/CodeGen/ARM/ssat-unroll-loops.ll +++ llvm/test/CodeGen/ARM/ssat-unroll-loops.ll @@ -125,63 +125,43 @@ define void @ssat_unroll_minmax(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i16* nocapture writeonly %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll_minmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r11, lr} -; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: beq .LBB1_6 +; CHECK-NEXT: beq .LBB1_5 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader -; CHECK-NEXT: movw r12, #32768 -; CHECK-NEXT: sub lr, r3, #1 +; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 -; CHECK-NEXT: movt r12, #65535 ; CHECK-NEXT: beq .LBB1_3 ; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader -; CHECK-NEXT: ldrsh r3, [r0], #2 -; CHECK-NEXT: ldrsh r4, [r1], #2 -; CHECK-NEXT: smulbb r3, r4, r3 -; CHECK-NEXT: asr r4, r3, #14 -; CHECK-NEXT: cmn r4, #32768 -; CHECK-NEXT: mov r4, r12 -; CHECK-NEXT: asrgt r4, r3, #14 -; CHECK-NEXT: movw r3, #32767 -; CHECK-NEXT: cmp r4, r3 -; CHECK-NEXT: movge r4, r3 -; CHECK-NEXT: mov r3, lr -; CHECK-NEXT: strh r4, [r2], #2 +; CHECK-NEXT: ldrsh lr, [r0], #2 +; CHECK-NEXT: ldrsh r3, [r1], #2 +; CHECK-NEXT: smulbb r3, r3, lr +; CHECK-NEXT: ssat r3, #16, r3, asr #14 +; CHECK-NEXT: strh r3, [r2], #2 +; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit -; CHECK-NEXT: cmp lr, #0 -; CHECK-NEXT: beq .LBB1_6 -; CHECK-NEXT: @ %bb.4: @ %while.body.preheader1 -; CHECK-NEXT: movw lr, #32767 -; CHECK-NEXT: .LBB1_5: @ %while.body +; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: popeq {r11, pc} +; CHECK-NEXT: .LBB1_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh r4, [r0] -; CHECK-NEXT: ldrsh r5, [r1] -; CHECK-NEXT: smulbb r4, r5, r4 -; CHECK-NEXT: asr r5, r4, #14 -; CHECK-NEXT: cmn r5, #32768 -; CHECK-NEXT: mov r5, r12 -; CHECK-NEXT: asrgt r5, r4, #14 -; CHECK-NEXT: cmp r5, lr -; CHECK-NEXT: movge r5, lr -; CHECK-NEXT: strh r5, [r2] -; CHECK-NEXT: ldrsh r4, [r0, #2] +; CHECK-NEXT: ldrsh r12, [r0] +; CHECK-NEXT: subs r3, r3, #2 +; CHECK-NEXT: ldrsh lr, [r1] +; CHECK-NEXT: smulbb r12, lr, r12 +; CHECK-NEXT: ssat r12, #16, r12, asr #14 +; CHECK-NEXT: strh r12, [r2] +; CHECK-NEXT: ldrsh r12, [r0, #2] ; CHECK-NEXT: add r0, r0, #4 -; CHECK-NEXT: ldrsh r5, [r1, #2] +; CHECK-NEXT: ldrsh lr, [r1, #2] ; CHECK-NEXT: add r1, r1, #4 -; CHECK-NEXT: smulbb r4, r5, r4 -; CHECK-NEXT: asr r5, r4, #14 -; CHECK-NEXT: cmn r5, #32768 -; CHECK-NEXT: mov r5, r12 -; CHECK-NEXT: asrgt r5, r4, #14 -; CHECK-NEXT: cmp r5, lr -; CHECK-NEXT: movge r5, lr -; CHECK-NEXT: subs r3, r3, #2 -; CHECK-NEXT: strh r5, [r2, #2] +; CHECK-NEXT: smulbb r12, lr, r12 +; CHECK-NEXT: ssat r12, #16, r12, asr #14 +; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 -; CHECK-NEXT: bne .LBB1_5 -; CHECK-NEXT: .LBB1_6: @ %while.end -; CHECK-NEXT: pop {r4, r5, r11, pc} +; CHECK-NEXT: bne .LBB1_4 +; CHECK-NEXT: .LBB1_5: @ %while.end +; CHECK-NEXT: pop {r11, pc} entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader Index: llvm/test/CodeGen/ARM/ssat.ll =================================================================== --- llvm/test/CodeGen/ARM/ssat.ll +++ llvm/test/CodeGen/ARM/ssat.ll @@ -669,14 +669,7 @@ ; ; V6T2-LABEL: mm_sat_base_32bit: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movle r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607) @@ -705,13 +698,7 @@ ; V6T2-LABEL: mm_sat_base_16bit: ; V6T2: @ %bb.0: @ %entry ; V6T2-NEXT: sxth r0, r0 -; V6T2-NEXT: movw r1, #2047 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movlt r1, r0 -; V6T2-NEXT: movw r0, #63488 -; V6T2-NEXT: movt r0, #65535 -; V6T2-NEXT: cmn r1, #2048 -; V6T2-NEXT: movgt r0, r1 +; V6T2-NEXT: ssat r0, #12, r0 ; V6T2-NEXT: bx lr entry: %0 = call i16 @llvm.smin.i16(i16 %x, i16 2047) @@ -734,10 +721,7 @@ ; V6T2-LABEL: mm_sat_base_8bit: ; V6T2: @ %bb.0: @ %entry ; V6T2-NEXT: sxtb r0, r0 -; V6T2-NEXT: cmp r0, #31 -; V6T2-NEXT: movge r0, #31 -; V6T2-NEXT: cmn r0, #32 -; V6T2-NEXT: mvnle r0, #31 +; V6T2-NEXT: ssat r0, #6, r0 ; V6T2-NEXT: bx lr entry: %0 = call i8 @llvm.smin.i8(i8 %x, i8 31) @@ -763,14 +747,7 @@ ; ; V6T2-LABEL: mm_sat_lower_upper_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movle r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607) @@ -796,14 +773,7 @@ ; ; V6T2-LABEL: mm_sat_lower_upper_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movle r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607) @@ -829,14 +799,7 @@ ; ; V6T2-LABEL: mm_sat_upper_lower_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: movle r0, r1 -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608) @@ -862,14 +825,7 @@ ; ; V6T2-LABEL: mm_sat_upper_lower_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: movle r0, r1 -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608) @@ -895,14 +851,7 @@ ; ; V6T2-LABEL: mm_sat_upper_lower_3: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: movle r0, r1 -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608) @@ -928,14 +877,7 @@ ; ; V6T2-LABEL: mm_sat_le_ge: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #0 -; V6T2-NEXT: cmn r0, #8388608 -; V6T2-NEXT: movt r1, #65408 -; V6T2-NEXT: movle r0, r1 -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movge r0, r1 +; V6T2-NEXT: ssat r0, #24, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608) Index: llvm/test/CodeGen/ARM/ssub_sat.ll =================================================================== --- llvm/test/CodeGen/ARM/ssub_sat.ll +++ llvm/test/CodeGen/ARM/ssub_sat.ll @@ -147,15 +147,7 @@ ; CHECK-T2NODSP-LABEL: func16: ; CHECK-T2NODSP: @ %bb.0: ; CHECK-T2NODSP-NEXT: subs r0, r0, r1 -; CHECK-T2NODSP-NEXT: movw r1, #32767 -; CHECK-T2NODSP-NEXT: cmp r0, r1 -; CHECK-T2NODSP-NEXT: it lt -; CHECK-T2NODSP-NEXT: movlt r1, r0 -; CHECK-T2NODSP-NEXT: movw r0, #32768 -; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 -; CHECK-T2NODSP-NEXT: movt r0, #65535 -; CHECK-T2NODSP-NEXT: it gt -; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: ssat r0, #16, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func16: @@ -218,12 +210,7 @@ ; CHECK-T2NODSP-LABEL: func8: ; CHECK-T2NODSP: @ %bb.0: ; CHECK-T2NODSP-NEXT: subs r0, r0, r1 -; CHECK-T2NODSP-NEXT: cmp r0, #127 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #127 -; CHECK-T2NODSP-NEXT: cmn.w r0, #128 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: ssat r0, #8, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func8: @@ -279,12 +266,7 @@ ; CHECK-T2NODSP-LABEL: func3: ; CHECK-T2NODSP: @ %bb.0: ; CHECK-T2NODSP-NEXT: subs r0, r0, r1 -; CHECK-T2NODSP-NEXT: cmp r0, #7 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #7 -; CHECK-T2NODSP-NEXT: cmn.w r0, #8 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: ssat r0, #4, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func3: Index: llvm/test/CodeGen/ARM/ssub_sat_plus.ll =================================================================== --- llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -151,15 +151,7 @@ ; CHECK-T2NODSP-NEXT: muls r1, r2, r1 ; CHECK-T2NODSP-NEXT: sxth r1, r1 ; CHECK-T2NODSP-NEXT: subs r0, r0, r1 -; CHECK-T2NODSP-NEXT: movw r1, #32767 -; CHECK-T2NODSP-NEXT: cmp r0, r1 -; CHECK-T2NODSP-NEXT: it lt -; CHECK-T2NODSP-NEXT: movlt r1, r0 -; CHECK-T2NODSP-NEXT: movw r0, #32768 -; CHECK-T2NODSP-NEXT: movt r0, #65535 -; CHECK-T2NODSP-NEXT: cmn.w r1, #32768 -; CHECK-T2NODSP-NEXT: it gt -; CHECK-T2NODSP-NEXT: movgt r0, r1 +; CHECK-T2NODSP-NEXT: ssat r0, #16, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func16: @@ -205,12 +197,7 @@ ; CHECK-T2NODSP-NEXT: muls r1, r2, r1 ; CHECK-T2NODSP-NEXT: sxtb r1, r1 ; CHECK-T2NODSP-NEXT: subs r0, r0, r1 -; CHECK-T2NODSP-NEXT: cmp r0, #127 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #127 -; CHECK-T2NODSP-NEXT: cmn.w r0, #128 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #127 +; CHECK-T2NODSP-NEXT: ssat r0, #8, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func8: @@ -257,12 +244,7 @@ ; CHECK-T2NODSP-NEXT: muls r1, r2, r1 ; CHECK-T2NODSP-NEXT: lsls r1, r1, #28 ; CHECK-T2NODSP-NEXT: sub.w r0, r0, r1, asr #28 -; CHECK-T2NODSP-NEXT: cmp r0, #7 -; CHECK-T2NODSP-NEXT: it ge -; CHECK-T2NODSP-NEXT: movge r0, #7 -; CHECK-T2NODSP-NEXT: cmn.w r0, #8 -; CHECK-T2NODSP-NEXT: it le -; CHECK-T2NODSP-NEXT: mvnle r0, #7 +; CHECK-T2NODSP-NEXT: ssat r0, #4, r0 ; CHECK-T2NODSP-NEXT: bx lr ; ; CHECK-T2DSP-LABEL: func4: Index: llvm/test/CodeGen/ARM/usat.ll =================================================================== --- llvm/test/CodeGen/ARM/usat.ll +++ llvm/test/CodeGen/ARM/usat.ll @@ -624,23 +624,12 @@ ; ; V6-LABEL: mm_unsigned_sat_base_32bit: ; V6: @ %bb.0: @ %entry -; V6-NEXT: ldr r1, .LCPI15_0 -; V6-NEXT: cmp r0, r1 -; V6-NEXT: movlt r1, r0 -; V6-NEXT: bic r0, r1, r1, asr #31 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI15_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_base_32bit: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movlt r1, r0 -; V6T2-NEXT: bic r0, r1, r1, asr #31 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607) @@ -662,21 +651,14 @@ ; ; V6-LABEL: mm_unsigned_sat_base_16bit: ; V6: @ %bb.0: @ %entry -; V6-NEXT: mov r1, #255 ; V6-NEXT: sxth r0, r0 -; V6-NEXT: orr r1, r1, #1792 -; V6-NEXT: cmp r0, r1 -; V6-NEXT: movlt r1, r0 -; V6-NEXT: bic r0, r1, r1, asr #31 +; V6-NEXT: usat r0, #11, r0 ; V6-NEXT: bx lr ; ; V6T2-LABEL: mm_unsigned_sat_base_16bit: ; V6T2: @ %bb.0: @ %entry ; V6T2-NEXT: sxth r0, r0 -; V6T2-NEXT: movw r1, #2047 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movlt r1, r0 -; V6T2-NEXT: bic r0, r1, r1, asr #31 +; V6T2-NEXT: usat r0, #11, r0 ; V6T2-NEXT: bx lr entry: %0 = call i16 @llvm.smin.i16(i16 %x, i16 2047) @@ -698,17 +680,13 @@ ; V6-LABEL: mm_unsigned_sat_base_8bit: ; V6: @ %bb.0: @ %entry ; V6-NEXT: sxtb r0, r0 -; V6-NEXT: cmp r0, #31 -; V6-NEXT: movge r0, #31 -; V6-NEXT: bic r0, r0, r0, asr #31 +; V6-NEXT: usat r0, #5, r0 ; V6-NEXT: bx lr ; ; V6T2-LABEL: mm_unsigned_sat_base_8bit: ; V6T2: @ %bb.0: @ %entry ; V6T2-NEXT: sxtb r0, r0 -; V6T2-NEXT: cmp r0, #31 -; V6T2-NEXT: movge r0, #31 -; V6T2-NEXT: bic r0, r0, r0, asr #31 +; V6T2-NEXT: usat r0, #5, r0 ; V6T2-NEXT: bx lr entry: %0 = call i8 @llvm.smin.i8(i8 %x, i8 31) @@ -731,23 +709,12 @@ ; ; V6-LABEL: mm_unsigned_sat_lower_upper_1: ; V6: @ %bb.0: @ %entry -; V6-NEXT: ldr r1, .LCPI18_0 -; V6-NEXT: cmp r0, r1 -; V6-NEXT: movlt r1, r0 -; V6-NEXT: bic r0, r1, r1, asr #31 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI18_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_lower_upper_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movlt r1, r0 -; V6T2-NEXT: bic r0, r1, r1, asr #31 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607) @@ -770,23 +737,12 @@ ; ; V6-LABEL: mm_unsigned_sat_lower_upper_2: ; V6: @ %bb.0: @ %entry -; V6-NEXT: ldr r1, .LCPI19_0 -; V6-NEXT: cmp r0, r1 -; V6-NEXT: movlt r1, r0 -; V6-NEXT: bic r0, r1, r1, asr #31 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI19_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_lower_upper_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: movw r1, #65535 -; V6T2-NEXT: movt r1, #127 -; V6T2-NEXT: cmp r0, r1 -; V6T2-NEXT: movlt r1, r0 -; V6T2-NEXT: bic r0, r1, r1, asr #31 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)