diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15355,6 +15355,91 @@ CCmp); } +/// isIntImmediate - This method tests to see if the node is a constant +/// operand. If so Imm will receive the 32-bit value. +static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { + if (const ConstantSDNode *C = dyn_cast(N)) { + Imm = C->getZExtValue(); + return true; + } + return false; +} + +// isOpcWithIntImmediate - This method tests to see if the node is a specific +// opcode and that it has a immediate integer right operand. +// If so Imm will receive the 32 bit value. +static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, + uint64_t &Imm) { + return N->getOpcode() == Opc && + isIntImmediate(N->getOperand(1).getNode(), Imm); +} + +static bool isShiftedMask(uint64_t Mask, EVT VT) { + assert(VT == MVT::i32 || VT == MVT::i64); + if (VT == MVT::i32) + return isShiftedMask_32(Mask); + return isShiftedMask_64(Mask); +} + +static SDValue tryCombineToORWithShift(SDNode* N, TargetLowering::DAGCombinerInfo& DCI) { + assert (N->getOpcode() == ISD::OR && "N must be an OR operation to call this function"); + + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + // Bail out when value type is not one of {i32, i64}, since AArch64 ORR with shifted register is only available for i32 and i64. + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + auto isAndOperandWithShiftedMask = [](SDValue V, EVT VT, uint64_t& AndImm) -> bool { + if (!isOpcWithIntImmediate(V.getNode(), ISD::AND, AndImm)) + return false; + + return isShiftedMask(AndImm, VT); + }; + + auto getAndOperandWithShiftedMaskIndex = [isAndOperandWithShiftedMask](SDNode* N, EVT VT, uint64_t& AndImm) -> int { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (isAndOperandWithShiftedMask(LHS, VT, AndImm)) + return 0; + + if (isAndOperandWithShiftedMask(RHS, VT, AndImm)) + return 1; + + return -1; + }; + + uint64_t AndImm = 0; + + // Bail out if neither operand is AND(VAL, ShiftedMask) + if (int OperandIndex = getAndOperandWithShiftedMaskIndex(N, VT, AndImm); OperandIndex >= 0) { + assert(OperandIndex < 2 && "OperandIndex should be 0 or 1"); + SDValue AndOperandWithShiftedMask = N->getOperand(OperandIndex); + SDValue OtherOperand = N->getOperand(1 - OperandIndex); + + // Proceed when AND has one use (the OR node). + if (AndOperandWithShiftedMask.hasOneUse()) { + assert (isShiftedMask(AndImm, VT) && "AndImm should be a shifted mask"); + + const unsigned ShiftAmount = countTrailingZeros(AndImm); + + if (ShiftAmount > 0) { + SDValue AndOperandWithMask = DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::SRL, DL, VT, AndOperandWithShiftedMask.getOperand(0), DAG.getConstant(ShiftAmount, DL, VT)), + DAG.getConstant(AndImm >> ShiftAmount, DL, VT)); + + return DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ISD::SHL, DL, VT, AndOperandWithMask, DAG.getConstant(ShiftAmount, DL, VT)), OtherOperand); + } + } + } + + + return SDValue(); +} + static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; @@ -15370,6 +15455,9 @@ if (SDValue Res = tryCombineToEXTR(N, DCI)) return Res; + if (SDValue Res = tryCombineToORWithShift(N, DCI)) + return Res; + if (SDValue Res = tryCombineToBSL(N, DCI)) return Res; diff --git a/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll b/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll --- a/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-lsr-bfi.ll @@ -4,7 +4,7 @@ define i32 @lsr_bfi(i32 %a) { ; CHECK-LABEL: lsr_bfi: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #20 +; CHECK-NEXT: ubfx w8, w0, #20, #4 ; CHECK-NEXT: bfi w0, w8, #4, #4 ; CHECK-NEXT: ret %and1 = and i32 %a, -241 @@ -29,10 +29,10 @@ define i32 @negative_lsr_bfi1(i32 %a) { ; CHECK-LABEL: negative_lsr_bfi1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #16 -; CHECK-NEXT: lsr w9, w8, #4 -; CHECK-NEXT: bfi w0, w9, #4, #4 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ubfx w8, w0, #20, #4 +; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: bfi w9, w8, #4, #4 +; CHECK-NEXT: add w0, w9, w0, lsr #16 ; CHECK-NEXT: ret %and1 = and i32 %a, -241 %1 = lshr i32 %a, 16 @@ -45,7 +45,7 @@ define i64 @lsr_bfix(i64 %a) { ; CHECK-LABEL: lsr_bfix: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #20 +; CHECK-NEXT: ubfx x8, x0, #20, #4 ; CHECK-NEXT: bfi x0, x8, #4, #4 ; CHECK-NEXT: ret %and1 = and i64 %a, -241 @@ -70,10 +70,10 @@ define i64 @negative_lsr_bfix1(i64 %a) { ; CHECK-LABEL: negative_lsr_bfix1: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #16 -; CHECK-NEXT: lsr x9, x8, #4 -; CHECK-NEXT: bfi x0, x9, #4, #4 -; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: ubfx x8, x0, #20, #4 +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: bfi x9, x8, #4, #4 +; CHECK-NEXT: add x0, x9, x0, lsr #16 ; CHECK-NEXT: ret %and1 = and i64 %a, -241 %1 = lshr i64 %a, 16 diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll --- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -292,9 +292,8 @@ define i64 @i64_or_lhs_bitfield_positioning(i64 %tmp1, i64 %tmp2) { ; CHECK-LABEL: i64_or_lhs_bitfield_positioning: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl w8, w1, #7 -; CHECK-NEXT: and x8, x8, #0x3f80 -; CHECK-NEXT: orr x0, x8, x0 +; CHECK-NEXT: and x8, x1, #0x7f +; CHECK-NEXT: orr x0, x0, x8, lsl #7 ; CHECK-NEXT: ret entry: %and = shl i64 %tmp2, 7 @@ -306,9 +305,8 @@ define i64 @i64_or_rhs_bitfield_positioning(i64 %tmp1, i64 %tmp2) { ; CHECK-LABEL: i64_or_rhs_bitfield_positioning: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl w8, w1, #7 -; CHECK-NEXT: and x8, x8, #0x3f80 -; CHECK-NEXT: orr x0, x0, x8 +; CHECK-NEXT: and x8, x1, #0x7f +; CHECK-NEXT: orr x0, x0, x8, lsl #7 ; CHECK-NEXT: ret entry: %and = shl i64 %tmp2, 7 @@ -320,8 +318,8 @@ define i32 @i32_or_lhs_bitfield_positioning(i32 %tmp1, i32 %tmp2) { ; CHECK-LABEL: i32_or_lhs_bitfield_positioning: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ubfiz w8, w1, #7, #7 -; CHECK-NEXT: orr w0, w8, w0 +; CHECK-NEXT: and w8, w1, #0x7f +; CHECK-NEXT: orr w0, w0, w8, lsl #7 ; CHECK-NEXT: ret entry: %and = shl i32 %tmp2, 7 @@ -333,8 +331,8 @@ define i32 @i32_or_rhs_bitfield_positioning(i32 %tmp1, i32 %tmp2) { ; CHECK-LABEL: i32_or_rhs_bitfield_positioning: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ubfiz w8, w1, #7, #7 -; CHECK-NEXT: orr w0, w0, w8 +; CHECK-NEXT: and w8, w1, #0x7f +; CHECK-NEXT: orr w0, w0, w8, lsl #7 ; CHECK-NEXT: ret entry: %and = shl i32 %tmp2, 7 diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -9,7 +9,7 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-LABEL: out8_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #2 +; CHECK-NEXT: ubfx w8, w0, #2, #4 ; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: bfi w0, w8, #2, #4 ; CHECK-NEXT: ret @@ -22,7 +22,7 @@ define i16 @out16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: out16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #4 +; CHECK-NEXT: ubfx w8, w0, #4, #8 ; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: bfi w0, w8, #4, #8 ; CHECK-NEXT: ret @@ -35,7 +35,7 @@ define i32 @out32_constmask(i32 %x, i32 %y) { ; CHECK-LABEL: out32_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #8 +; CHECK-NEXT: ubfx w8, w0, #8, #16 ; CHECK-NEXT: mov w0, w1 ; CHECK-NEXT: bfi w0, w8, #8, #16 ; CHECK-NEXT: ret @@ -48,7 +48,7 @@ define i64 @out64_constmask(i64 %x, i64 %y) { ; CHECK-LABEL: out64_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: ubfx x8, x0, #16, #32 ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: bfi x0, x8, #16, #32 ; CHECK-NEXT: ret @@ -246,9 +246,9 @@ ; CHECK-LABEL: n0_badconstmask: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w1, #0xffffff00 -; CHECK-NEXT: and w9, w0, #0xffff00 +; CHECK-NEXT: ubfx w9, w0, #8, #16 ; CHECK-NEXT: and w8, w8, #0xff0001ff -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: orr w0, w8, w9, lsl #8 ; CHECK-NEXT: ret %mx = and i32 %x, 16776960 %my = and i32 %y, -16776960 ; instead of -16776961