diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2874,10 +2874,17 @@ return false; } +// Given an 'ISD::OR' node that is going to be selected as BFM, analyze +// the operands and select it to AArch64::ORR with shifted registers if +// that's more efficient. Returns true iff selection to AArch64::ORR happens. static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern) { EVT VT = N->getValueType(0); + assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node"); + assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) || + (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) && + "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR"); assert((VT == MVT::i32 || VT == MVT::i64) && "Expect result type to be i32 or i64 since N is combinable to BFM"); SDLoc DL(N); @@ -2886,6 +2893,7 @@ if (OrOpd1 != Dst) return false; + const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. if (BiggerPattern) { @@ -2902,7 +2910,6 @@ uint64_t ShiftAmount; if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand, ShiftAmount)) { - unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; SDValue Ops[] = {OrOpd0, ShiftedOperand, CurDAG->getTargetConstant(ShiftAmount, DL, VT)}; CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); @@ -2914,13 +2921,53 @@ assert((!BiggerPattern) && "BiggerPattern should be handled above"); + SDValue Op; uint64_t ShlImm; - if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm) && - OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { - unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; - SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ShlImm, DL, VT)}; - CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); - return true; + if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) { + if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { + SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ShlImm, DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } + + // Select the following pattern to left-shifted operand rather than BFI. + // %val1 = op .. + // %val2 = shl %val1, #imm + // %res = or %val1, %val2 + // + // If N is selected to be BFI, we know that + // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into + // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved) + // + // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly. + if (OrOpd0.getOperand(0) == OrOpd1) { + SDValue Ops[] = {OrOpd1, OrOpd1, + CurDAG->getTargetConstant(ShlImm, DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } + } + + uint64_t SrlImm; + if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) { + // Select the following pattern to right-shifted operand rather than BFXIL. + // %val1 = op .. + // %val2 = lshr %val1, #imm + // %res = or %val1, %val2 + // + // If N is selected to be BFXIL, we know that + // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into + // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved) + // + // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly. + if (OrOpd0.getOperand(0) == OrOpd1) { + SDValue Ops[] = { + OrOpd1, OrOpd1, + CurDAG->getTargetConstant( + AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } } return false; diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -643,8 +643,7 @@ ; CHECK-LABEL: test_orr_not_bfi_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: and x8, x0, #0xff -; CHECK-NEXT: bfi x8, x0, #8, #8 -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: orr x0, x8, x8, lsl #8 ; CHECK-NEXT: ret %2 = and i64 %0, 255 %3 = shl i64 %2, 8 @@ -674,8 +673,7 @@ ; CHECK-LABEL: test_orr_not_bfi_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: bfi w8, w0, #8, #8 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w8, lsl #8 ; CHECK-NEXT: ret %2 = and i32 %0, 255 %3 = shl i32 %2, 8 @@ -704,8 +702,7 @@ ; CHECK-LABEL: orr_not_bfxil_test2_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: and x8, x0, #0xff000 -; CHECK-NEXT: bfxil x8, x0, #12, #8 -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: orr x0, x8, x8, lsr #12 ; CHECK-NEXT: ret %2 = and i64 %0, 1044480 ; 0xff000 %3 = lshr i64 %2, 12 @@ -734,8 +731,7 @@ ; CHECK-LABEL: orr_not_bfxil_test2_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff000 -; CHECK-NEXT: bfxil w8, w0, #12, #8 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w8, lsr #12 ; CHECK-NEXT: ret %2 = and i32 %0, 1044480 ; 0xff000 %3 = lshr i32 %2, 12