Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4998,16 +4998,6 @@ ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); } -// Similar to isLowerSaturate(), but checks for upper-saturating conditions. -static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, - const SDValue TrueVal, const SDValue FalseVal, - const ISD::CondCode CC, const SDValue K) { - return (isGTorGE(CC) && - ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || - (isLTorLE(CC) && - ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); -} - // Check if two chained conditionals could be converted into SSAT or USAT. // // SSAT can replace a set of two conditional selectors that bound a number to an @@ -5019,6 +5009,9 @@ // x < k ? (x < -k ? -k : x) : k // etc. // +// LLVM canonicalizes these to either a min(max()) or a max(min()) pattern. +// This function tries to match one of these and will return true if successful. +// // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is // a power of 2. // @@ -5026,9 +5019,9 @@ // Additionally, the variable is returned in parameter V, the constant in K and // usat is set to true if the conditional represents an unsigned saturation static bool isSaturatingConditional(const SDValue &Op, SDValue &V, - uint64_t &K, bool &usat) { - SDValue LHS1 = Op.getOperand(0); - SDValue RHS1 = Op.getOperand(1); + uint64_t &K, bool &Usat) { + SDValue V1 = Op.getOperand(0); + SDValue K1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); SDValue FalseVal1 = Op.getOperand(3); ISD::CondCode CC1 = cast(Op.getOperand(4))->get(); @@ -5037,82 +5030,57 @@ if (Op2.getOpcode() != ISD::SELECT_CC) return false; - SDValue LHS2 = Op2.getOperand(0); - SDValue RHS2 = Op2.getOperand(1); + SDValue V2 = Op2.getOperand(0); + SDValue K2 = Op2.getOperand(1); SDValue TrueVal2 = Op2.getOperand(2); SDValue FalseVal2 = Op2.getOperand(3); ISD::CondCode CC2 = cast(Op2.getOperand(4))->get(); - // Find out which are the constants and which are the variables - // in each conditional - SDValue *K1 = isa(LHS1) ? &LHS1 : isa(RHS1) - ? &RHS1 - : nullptr; - SDValue *K2 = isa(LHS2) ? &LHS2 : isa(RHS2) - ? &RHS2 - : nullptr; - SDValue K2Tmp = isa(TrueVal2) ? TrueVal2 : FalseVal2; - SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; - SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; - SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; - - // We must detect cases where the original operations worked with 16- or - // 8-bit values. In such case, V2Tmp != V2 because the comparison operations - // must work with sign-extended values but the select operations return - // the original non-extended value. - SDValue V2TmpReg = V2Tmp; - if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) - V2TmpReg = V2Tmp->getOperand(0); - - // Check that the registers and the constants have the correct values - // in both conditionals - if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || - V2TmpReg != V2) - return false; + SDValue V1Tmp = V1; + SDValue V2Tmp = V2; - // Figure out which conditional is saturating the lower/upper bound. - const SDValue *LowerCheckOp = - isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) - ? &Op - : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) - ? &Op2 - : nullptr; - const SDValue *UpperCheckOp = - isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) - ? &Op - : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) - ? &Op2 - : nullptr; - - if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) - return false; + if (V1.getOpcode() == ISD::SIGN_EXTEND_INREG && + V2.getOpcode() == ISD::SIGN_EXTEND_INREG) { + V1Tmp = V1.getOperand(0); + V2Tmp = V2.getOperand(0); + } + + // Check that the registers and the constants match a max(min()) or min(max()) + // pattern + if (V1Tmp == TrueVal1 && V2Tmp == TrueVal2 && K1 == FalseVal1 && + K2 == FalseVal2 && + ((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2)))) { - // Check that the constant in the lower-bound check is - // the opposite of the constant in the upper-bound check - // in 1's complement. - int64_t Val1 = cast(*K1)->getSExtValue(); - int64_t Val2 = cast(*K2)->getSExtValue(); - int64_t PosVal = std::max(Val1, Val2); - int64_t NegVal = std::min(Val1, Val2); + // Check that the constant in the lower-bound check is + // the opposite of the constant in the upper-bound check + // in 1's complement. + if (!isa(K1) || !isa(K2)) + return false; + + int64_t Val1 = cast(K1)->getSExtValue(); + int64_t Val2 = cast(K2)->getSExtValue(); + int64_t PosVal = std::max(Val1, Val2); + int64_t NegVal = std::min(Val1, Val2); - if (((Val1 > Val2 && UpperCheckOp == &Op) || - (Val1 < Val2 && UpperCheckOp == &Op2)) && - isPowerOf2_64(PosVal + 1)) { + if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) && + !isPowerOf2_64(PosVal + 1)) + return false; - // Handle the difference between USAT (unsigned) and SSAT (signed) saturation + // Handle the difference between USAT (unsigned) and SSAT (signed) + // saturation if (Val1 == ~Val2) - usat = false; + Usat = false; else if (NegVal == 0) - usat = true; + Usat = true; else return false; - V = V2; - K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + V = V2Tmp; + // At this point, PosVal is guaranteed to be positive + K = (uint64_t) PosVal; return true; } - return false; } Index: llvm/test/CodeGen/ARM/ssat.ll =================================================================== --- llvm/test/CodeGen/ARM/ssat.ll +++ llvm/test/CodeGen/ARM/ssat.ll @@ -20,10 +20,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i32 %x, -8388608 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %saturateUp + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, -8388608 + %saturateLow = select i1 %1, i32 %saturateUp, i32 -8388608 ret i32 %saturateLow } @@ -34,10 +34,10 @@ ; V6T2: ssat r0, #12, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i16 %x, -2048 - %cmpUp = icmp sgt i16 %x, 2047 - %saturateUp = select i1 %cmpUp, i16 2047, i16 %x - %saturateLow = select i1 %cmpLow, i16 -2048, i16 %saturateUp + %0 = icmp slt i16 %x, 2047 + %saturateUp = select i1 %0, i16 %x, i16 2047 + %1 = icmp sgt i16 %saturateUp, -2048 + %saturateLow = select i1 %1, i16 %saturateUp, i16 -2048 ret i16 %saturateLow } @@ -48,10 +48,10 @@ ; V6T2: ssat r0, #6, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i8 %x, -32 - %cmpUp = icmp sgt i8 %x, 31 - %saturateUp = select i1 %cmpUp, i8 31, i8 %x - %saturateLow = select i1 %cmpLow, i8 -32, i8 %saturateUp + %0 = icmp slt i8 %x, 31 + %saturateUp = select i1 %0, i8 %x, i8 31 + %1 = icmp sgt i8 %saturateUp, -32 + %saturateLow = select i1 %1, i8 %saturateUp, i8 -32 ret i8 %saturateLow } @@ -67,10 +67,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp slt i32 %x, -8388608 %cmpUp = icmp slt i32 %x, 8388607 %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %saturateUp + %0 = icmp sgt i32 %saturateUp, -8388608 + %saturateLow = select i1 %0, i32 %saturateUp, i32 -8388608 ret i32 %saturateLow } @@ -80,10 +80,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpLow = icmp sgt i32 %x, -8388608 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 -8388608 + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, -8388608 + %saturateLow = select i1 %1, i32 %saturateUp, i32 -8388608 ret i32 %saturateLow } @@ -93,10 +93,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x - %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -106,10 +106,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -119,10 +119,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp slt i32 8388607, %x %cmpLow = icmp sgt i32 %x, -8388608 %saturateLow = select i1 %cmpLow, i32 %x, i32 -8388608 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %0, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -137,10 +137,10 @@ ; V6T2: ssat r0, #24, r0 ; V4T-NOT: ssat entry: - %cmpUp = icmp sle i32 8388607, %x - %cmpLow = icmp sge i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 %x, i32 -8388608 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -156,8 +156,8 @@ ; CHECK-NOT: ssat entry: %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp sgt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %0 = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -169,8 +169,8 @@ ; CHECK-NOT: ssat entry: %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -8388608 - %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %0 = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %0, i32 %x, i32 -8388608 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -192,10 +192,10 @@ ; CHECK-LABEL: no_sat_incorrect_interval: ; CHECK-NOT: ssat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -19088744 - %saturateLow = select i1 %cmpLow, i32 -19088744, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -19088744 + %saturateLow = select i1 %0, i32 %x, i32 -19088744 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } Index: llvm/test/CodeGen/ARM/usat.ll =================================================================== --- llvm/test/CodeGen/ARM/usat.ll +++ llvm/test/CodeGen/ARM/usat.ll @@ -22,10 +22,10 @@ ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i32 %x, 0 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, 0 + %saturateLow = select i1 %1, i32 %saturateUp, i32 0 ret i32 %saturateLow } @@ -37,10 +37,10 @@ ; V6T2: usat r0, #11, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i16 %x, 0 - %cmpUp = icmp sgt i16 %x, 2047 - %saturateUp = select i1 %cmpUp, i16 2047, i16 %x - %saturateLow = select i1 %cmpLow, i16 0, i16 %saturateUp + %0 = icmp slt i16 %x, 2047 + %saturateUp = select i1 %0, i16 %x, i16 2047 + %1 = icmp sgt i16 %saturateUp, 0 + %saturateLow = select i1 %1, i16 %saturateUp, i16 0 ret i16 %saturateLow } @@ -52,10 +52,10 @@ ; V6T2: usat r0, #5, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i8 %x, 0 - %cmpUp = icmp sgt i8 %x, 31 - %saturateUp = select i1 %cmpUp, i8 31, i8 %x - %saturateLow = select i1 %cmpLow, i8 0, i8 %saturateUp + %0 = icmp slt i8 %x, 31 + %saturateUp = select i1 %0, i8 %x, i8 31 + %1 = icmp sgt i8 %saturateUp, 0 + %saturateLow = select i1 %1, i8 %saturateUp, i8 0 ret i8 %saturateLow } @@ -71,10 +71,10 @@ ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp slt i32 %x, 0 %cmpUp = icmp slt i32 %x, 8388607 %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 - %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + %0 = icmp sgt i32 %saturateUp, 0 + %saturateLow = select i1 %0, i32 %saturateUp, i32 0 ret i32 %saturateLow } @@ -85,10 +85,10 @@ ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpLow = icmp sgt i32 %x, 0 - %cmpUp = icmp sgt i32 %x, 8388607 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x - %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 0 + %0 = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %0, i32 %x, i32 8388607 + %1 = icmp sgt i32 %saturateUp, 0 + %saturateLow = select i1 %1, i32 %saturateUp, i32 0 ret i32 %saturateLow } @@ -99,10 +99,10 @@ ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x - %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -113,10 +113,10 @@ ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -127,10 +127,10 @@ ; V6T2: usat r0, #23, r0 ; V4T-NOT: usat entry: - %cmpUp = icmp slt i32 8388607, %x %cmpLow = icmp sgt i32 %x, 0 %saturateLow = select i1 %cmpLow, i32 %x, i32 0 - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %0, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -145,8 +145,8 @@ ; CHECK-NOT: usat entry: %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp sgt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %0 = icmp slt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -158,8 +158,8 @@ ; CHECK-NOT: usat entry: %cmpUp = icmp slt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %0 = icmp sgt i32 %x, 0 + %saturateLow = select i1 %0, i32 %x, i32 0 %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow ret i32 %saturateUp } @@ -169,10 +169,10 @@ ; CHECK-LABEL: no_unsigned_sat_incorrect_constant: ; CHECK-NOT: usat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, 0 - %saturateLow = select i1 %cmpLow, i32 -1, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %cmpLow.inv = icmp sgt i32 %x, -1 + %saturateLow = select i1 %cmpLow.inv, i32 %x, i32 -1 + %0 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %0, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } @@ -181,10 +181,10 @@ ; CHECK-LABEL: no_unsigned_sat_incorrect_interval: ; CHECK-NOT: usat entry: - %cmpUp = icmp sgt i32 %x, 8388607 - %cmpLow = icmp slt i32 %x, -4 - %saturateLow = select i1 %cmpLow, i32 -4, i32 %x - %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + %0 = icmp sgt i32 %x, -4 + %saturateLow = select i1 %0, i32 %x, i32 -4 + %1 = icmp slt i32 %saturateLow, 8388607 + %saturateUp = select i1 %1, i32 %saturateLow, i32 8388607 ret i32 %saturateUp } Index: llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -2240,15 +2240,9 @@ ; CHECK-NEXT: ldrsb r0, [r12], #1 ; CHECK-NEXT: ldrsb r1, [r6], #1 ; CHECK-NEXT: muls r0, r1, r0 -; CHECK-NEXT: asrs r1, r0, #7 -; CHECK-NEXT: cmn.w r1, #128 -; CHECK-NEXT: mvn r1, #127 -; CHECK-NEXT: it gt -; CHECK-NEXT: asrgt r1, r0, #7 -; CHECK-NEXT: cmp r1, #127 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r1, #127 -; CHECK-NEXT: strb r1, [r4], #1 +; CHECK-NEXT: asrs r0, r0, #7 +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: strb r0, [r4], #1 ; CHECK-NEXT: le lr, .LBB13_7 ; CHECK-NEXT: .LBB13_8: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, r5, r6, pc}