diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -21228,6 +21228,29 @@ return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv); } +static SDValue performDupLane32Combine(SDNode *N, SelectionDAG &DAG) { + SDValue Insert = N->getOperand(0); + if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR) return SDValue(); + if (!Insert.getOperand(0).isUndef()) return SDValue(); + + uint64_t IdxDupLane = N->getConstantOperandVal(1); + uint64_t IdxInsert = Insert.getConstantOperandVal(2); + if (IdxInsert != 0 || IdxDupLane != 0) return SDValue(); + + SDValue Truncate = Insert.getOperand(1); + if (Truncate.getOpcode() != ISD::TRUNCATE) return SDValue(); + + EVT InsertVT = Insert.getValueType(); + EVT WideVT = Truncate.getOperand(0).getValueType(); + if (InsertVT.getSizeInBits() != WideVT.getSizeInBits()) return SDValue(); + + SDLoc DL(N); + SDValue Bitcast = + DAG.getNode(ISD::BITCAST, DL, InsertVT, Truncate.getOperand(0)); + return DAG.getNode(AArch64ISD::DUPLANE32, DL, N->getValueType(0), Bitcast, + N->getOperand(1)); +} + static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -21353,6 +21376,8 @@ return performCSELCombine(N, DCI, DAG); case AArch64ISD::DUP: return performDUPCombine(N, DCI); + case AArch64ISD::DUPLANE32: + return performDupLane32Combine(N, DAG); case AArch64ISD::DUPLANE128: return performDupLane128Combine(N, DAG); case AArch64ISD::NVCAST: diff --git a/llvm/test/CodeGen/AArch64/trunc-v1i64.ll b/llvm/test/CodeGen/AArch64/trunc-v1i64.ll --- a/llvm/test/CodeGen/AArch64/trunc-v1i64.ll +++ b/llvm/test/CodeGen/AArch64/trunc-v1i64.ll @@ -21,8 +21,8 @@ define <2 x i32> @test_v1i32_1(<1 x i64> %in0) { ; CHECK-LABEL: test_v1i32_1: -; CHECK: xtn v0.2s, v0.2d -; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NOT: xtn +; CHECK: dup v0.2s, v0.s[0] %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> %2 = trunc <2 x i64> %1 to <2 x i32> ret <2 x i32> %2