diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -869,6 +869,15 @@ /// integer type VT, by either zero-extending or truncating it. SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT); + /// Check if \param Val is a zero extention inreg. The zero extend + /// inreg operation is rendered with and AND instruction and a + /// suitable constant that set the high bits of the register to + /// zero. For example, a zero extend inreg of an i8 value loaded as + /// anyext into a i32 value is rendered as: + /// + /// i32 = AND i32 (load anyext from i8), i32 Constant<255>. + bool isZeroExtendInReg(SDValue Val) const; + /// Return the expression required to zero extend the Op /// value assuming it was the smaller SrcTy value. SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11279,16 +11279,31 @@ } } + // Checks if the uses of a load are extensions that differ in signedness. + auto UsesDifferInSignExtension = [this](LoadSDNode *Load) -> bool { + if (Load->use_size() != 2) + return false; + + SDNode::use_iterator UseIt = Load->use_begin(); + SDNode *UseOne = *UseIt; + SDNode *UseTwo = *++UseIt; + if (UseOne->getOpcode() == ISD::SIGN_EXTEND_INREG && + this->DAG.isZeroExtendInReg(SDValue(UseTwo, 0))) + return true; + + return false; + }; + // fold (sext_inreg (extload x)) -> (sextload x) // If sextload is not supported by target, we can only do the combine when // load has one use. Doing otherwise can block folding the extload with other // extends that the target does support. - if (ISD::isEXTLoad(N0.getNode()) && - ISD::isUNINDEXEDLoad(N0.getNode()) && + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && ExtVT == cast(N0)->getMemoryVT() && ((!LegalOperations && cast(N0)->isSimple() && N0.hasOneUse()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) && + !UsesDifferInSignExtension(cast(N0))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1168,6 +1168,54 @@ return getNode(TLI->getExtendForContent(BType), SL, VT, Op); } +bool SelectionDAG::isZeroExtendInReg(SDValue Val) const { + // assert(NarrowVT.isInteger() && "Unexpected EVT in input."); + if (Val->getOpcode() != ISD::AND) + return false; + + // The LHS must be an extended load (any of sext/zext/anyext) from + // the input type NarrowVT to the WideVT used by the AND. + SDValue LHS = Val.getOperand(0); + if (!ISD::isEXTLoad(LHS.getNode())) + return false; + + EVT NarrowVT = cast(LHS)->getMemoryVT(); + EVT WideVT = Val.getValueType(); + + // They must be either vector or scalar types. + if (NarrowVT.isVector() && !WideVT.isVector()) + return false; + if (!NarrowVT.isVector() && WideVT.isVector()) + return false; + + // If vectors, they must have the same element count. + if (NarrowVT.isVector() && WideVT.isVector() && + (NarrowVT.getVectorElementCount() != WideVT.getVectorElementCount())) + return false; + // The type we are extending from must be smaller than the one used + // in the AND. + if (!NarrowVT.bitsLE(WideVT)) + return false; + + // If the types are the same there is no zero extension going on. + if (NarrowVT == WideVT) + return false; + + // RHS must be the constant that zeroes all the bits of the + // extension: 255 for i8 (0xff), 65535 for i16 (0xffff), and so + // on... + auto *C = dyn_cast(Val.getOperand(1)); + if (!C) + return false; + + APInt Imm = APInt::getLowBitsSet(WideVT.getScalarSizeInBits(), + NarrowVT.getScalarSizeInBits()); + if (C->getSExtValue() != Imm) + return false; + + return true; +} + SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { EVT OpVT = Op.getValueType(); assert(VT.isInteger() && OpVT.isInteger() && diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5887,6 +5887,16 @@ // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. uint64_t Mask = LHS.getValueSizeInBits() - 1; + // If LHS is a sext_inreg, we can check the sign bit of the + // original unextended data. + if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) { + Mask = cast(LHS.getOperand(1)) + ->getVT() + .getSizeInBits() + .getFixedSize() - + 1; + LHS = LHS.getOperand(0); + } return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS, DAG.getConstant(Mask, dl, MVT::i64), Dest); } @@ -5897,6 +5907,16 @@ // (a.k.a. TST) and the test in the test bit and branch instruction // becomes redundant. This would also increase register pressure. uint64_t Mask = LHS.getValueSizeInBits() - 1; + // If LHS is a sext_inreg, we can check the sign bit of the + // original unextended data. + if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) { + Mask = cast(LHS.getOperand(1)) + ->getVT() + .getSizeInBits() + .getFixedSize() - + 1; + LHS = LHS.getOperand(0); + } return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS, DAG.getConstant(Mask, dl, MVT::i64), Dest); } diff --git a/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll b/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zext-and-signed-compare.ll @@ -0,0 +1,94 @@ +; RUN: llc -mtriple aarch64-linux-gnu -o - -asm-verbose=0 < %s | FileCheck %s + +define i32 @f_i32_i8(i8* %p) nounwind { +; CHECK-LABEL: f_i32_i8: +; CHECK-NEXT: ldrb w[[N:[0-9]+]], [x0] +; CHECK-NEXT: tbnz w[[N]], #7, .LBB[[BB:.*]] +; CHECK-NEXT: add w0, w[[N]], w[[N]] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB[[BB]] +; CHECK-NEXT: mul w0, w[[N]], w[[N]] +; CHECK-NEXT: ret +entry: + %0 = load i8, i8* %p + %conv = zext i8 %0 to i32 + %cmp = icmp sgt i8 %0, -1 + br i1 %cmp, label %A, label %B + +A: + %retval2 = add i32 %conv, %conv + ret i32 %retval2 + +B: + %retval1 = mul i32 %conv, %conv + ret i32 %retval1 +} + +define i32 @f_i32_i16(i16* %p) nounwind { +; CHECK-LABEL: f_i32_i16: +; CHECK-NEXT: ldrh w[[N:[0-9]+]], [x0] +; CHECK-NEXT: tbnz w[[N]], #15, .LBB[[BB:.*]] +; CHECK-NEXT: add w0, w[[N]], w[[N]] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB[[BB]] +; CHECK-NEXT: mul w0, w[[N]], w[[N]] +; CHECK-NEXT: ret +entry: + %0 = load i16, i16* %p + %conv = zext i16 %0 to i32 + %cmp = icmp sgt i16 %0, -1 + br i1 %cmp, label %A, label %B + +A: + %retval2 = add i32 %conv, %conv + ret i32 %retval2 + +B: + %retval1 = mul i32 %conv, %conv + ret i32 %retval1 +} + +define i32 @g_i32_i8(i8* %p) nounwind { +; CHECK-LABEL: g_i32_i8: +; CHECK-NEXT: ldrb w0, [x0] +; CHECK-NEXT: tbnz w0, #7, .LBB[[BB:.*]] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB[[BB]] +; CHECK-NEXT: lsl w0, w0, #1 +; CHECK-NEXT: ret +entry: + %0 = load i8, i8* %p, align 1 + %conv = zext i8 %0 to i32 + %cmp1 = icmp sgt i8 %0, -1 + br i1 %cmp1, label %return, label %B + +B: ; preds = %entry + %add = shl nuw nsw i32 %conv, 1 + ret i32 %add + +return: ; preds = %entry + ret i32 %conv +} + +define i32 @g_i32_i16(i16* %p) nounwind { +; CHECK-LABEL: g_i32_i16: +; CHECK-NEXT: ldrh w0, [x0] +; CHECK-NEXT: tbnz w0, #15, .LBB[[BB:.*]] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB[[BB]] +; CHECK-NEXT: lsl w0, w0, #1 +; CHECK-NEXT: ret +entry: + %0 = load i16, i16* %p, align 1 + %conv = zext i16 %0 to i32 + %cmp1 = icmp sgt i16 %0, -1 + br i1 %cmp1, label %return, label %B + +B: ; preds = %entry + %add = shl nuw nsw i32 %conv, 1 + ret i32 %add + +return: ; preds = %entry + ret i32 %conv +} + diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -82,19 +82,19 @@ ; ENABLE-NEXT: bhi .LBB0_7 ; ENABLE-NEXT: @ %bb.14: @ %while.body24.preheader ; ENABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 -; ENABLE-NEXT: sub r3, r3, #2 +; ENABLE-NEXT: sub lr, r3, #2 ; ENABLE-NEXT: .LBB0_15: @ %while.body24 ; ENABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; ENABLE-NEXT: @ => This Inner Loop Header: Depth=2 -; ENABLE-NEXT: mov r0, r3 -; ENABLE-NEXT: cmp r3, r2 +; ENABLE-NEXT: mov r0, lr +; ENABLE-NEXT: cmp lr, r2 ; ENABLE-NEXT: bls .LBB0_7 ; ENABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge ; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 -; ENABLE-NEXT: mov r3, r0 -; ENABLE-NEXT: ldrsb lr, [r3], #-1 -; ENABLE-NEXT: cmn lr, #1 -; ENABLE-NEXT: uxtb r12, lr +; ENABLE-NEXT: mov lr, r0 +; ENABLE-NEXT: ldrb r12, [lr], #-1 +; ENABLE-NEXT: sxtb r3, r12 +; ENABLE-NEXT: cmn r3, #1 ; ENABLE-NEXT: bgt .LBB0_7 ; ENABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge ; ENABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 @@ -172,19 +172,19 @@ ; DISABLE-NEXT: bhi .LBB0_7 ; DISABLE-NEXT: @ %bb.14: @ %while.body24.preheader ; DISABLE-NEXT: @ in Loop: Header=BB0_7 Depth=1 -; DISABLE-NEXT: sub r3, r3, #2 +; DISABLE-NEXT: sub lr, r3, #2 ; DISABLE-NEXT: .LBB0_15: @ %while.body24 ; DISABLE-NEXT: @ Parent Loop BB0_7 Depth=1 ; DISABLE-NEXT: @ => This Inner Loop Header: Depth=2 -; DISABLE-NEXT: mov r0, r3 -; DISABLE-NEXT: cmp r3, r2 +; DISABLE-NEXT: mov r0, lr +; DISABLE-NEXT: cmp lr, r2 ; DISABLE-NEXT: bls .LBB0_7 ; DISABLE-NEXT: @ %bb.16: @ %while.body24.land.rhs14_crit_edge ; DISABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2 -; DISABLE-NEXT: mov r3, r0 -; DISABLE-NEXT: ldrsb lr, [r3], #-1 -; DISABLE-NEXT: cmn lr, #1 -; DISABLE-NEXT: uxtb r12, lr +; DISABLE-NEXT: mov lr, r0 +; DISABLE-NEXT: ldrb r12, [lr], #-1 +; DISABLE-NEXT: sxtb r3, r12 +; DISABLE-NEXT: cmn r3, #1 ; DISABLE-NEXT: bgt .LBB0_7 ; DISABLE-NEXT: @ %bb.17: @ %while.body24.land.rhs14_crit_edge ; DISABLE-NEXT: @ in Loop: Header=BB0_15 Depth=2