diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13949,7 +13949,8 @@ } static SDValue PerformExtractEltCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *ST) { SDValue Op0 = N->getOperand(0); EVT VT = N->getValueType(0); SDLoc dl(N); @@ -13968,6 +13969,19 @@ return X; } + // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b + if (Op0.getValueType() == MVT::v4i32 && + isa(N->getOperand(1)) && + Op0.getOpcode() == ISD::BITCAST && + Op0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + Op0.getOperand(0).getValueType() == MVT::v2f64) { + SDValue BV = Op0.getOperand(0); + unsigned Offset = N->getConstantOperandVal(1); + SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1); + if (MOV.getOpcode() == ARMISD::VMOVDRR) + return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2); + } + return SDValue(); } @@ -16340,7 +16354,8 @@ case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); - case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI); + case ISD::EXTRACT_VECTOR_ELT: + return PerformExtractEltCombine(N, DCI, Subtarget); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget); case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget); diff --git a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll --- a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll @@ -95,51 +95,38 @@ ; CHECK-MVE: @ %bb.0: @ %entry ; CHECK-MVE-NEXT: .save {r7, lr} ; CHECK-MVE-NEXT: push {r7, lr} -; CHECK-MVE-NEXT: vmov d1, r2, r3 -; CHECK-MVE-NEXT: add r2, sp, #8 -; CHECK-MVE-NEXT: vldrw.u32 q1, [r2] -; CHECK-MVE-NEXT: vmov d0, r0, r1 -; CHECK-MVE-NEXT: vmov r1, s2 -; CHECK-MVE-NEXT: vmov r3, s6 -; CHECK-MVE-NEXT: vmov r0, s3 -; CHECK-MVE-NEXT: vmov r2, s7 -; CHECK-MVE-NEXT: adds.w lr, r1, r3 +; CHECK-MVE-NEXT: add.w r12, sp, #8 +; CHECK-MVE-NEXT: vldrw.u32 q0, [r12] +; CHECK-MVE-NEXT: vmov lr, s2 +; CHECK-MVE-NEXT: vmov r12, s3 +; CHECK-MVE-NEXT: adds.w r2, r2, lr +; CHECK-MVE-NEXT: adc.w r12, r12, r3 ; CHECK-MVE-NEXT: vmov r3, s0 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: adc.w r12, r0, r2 -; CHECK-MVE-NEXT: vmov r2, s1 -; CHECK-MVE-NEXT: vmov r0, s5 -; CHECK-MVE-NEXT: adds r1, r1, r3 -; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, lr -; CHECK-MVE-NEXT: adcs r0, r2 -; CHECK-MVE-NEXT: vmov q0[3], q0[1], r0, r12 -; CHECK-MVE-NEXT: vmov r0, r1, d0 -; CHECK-MVE-NEXT: vmov r2, r3, d1 +; CHECK-MVE-NEXT: adds r0, r0, r3 +; CHECK-MVE-NEXT: vmov q1[2], q1[0], r0, r2 +; CHECK-MVE-NEXT: vmov r0, s1 +; CHECK-MVE-NEXT: adcs r0, r1 +; CHECK-MVE-NEXT: vmov q1[3], q1[1], r0, r12 +; CHECK-MVE-NEXT: vmov r0, r1, d2 +; CHECK-MVE-NEXT: vmov r2, r3, d3 ; CHECK-MVE-NEXT: pop {r7, pc} ; ; CHECK-BE-LABEL: vector_add_i64: ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: .save {r7, lr} ; CHECK-BE-NEXT: push {r7, lr} -; CHECK-BE-NEXT: vmov d1, r3, r2 -; CHECK-BE-NEXT: add r2, sp, #8 -; CHECK-BE-NEXT: vmov d0, r1, r0 -; CHECK-BE-NEXT: vrev64.32 q1, q0 -; CHECK-BE-NEXT: vldrw.u32 q0, [r2] -; CHECK-BE-NEXT: vmov r1, s7 -; CHECK-BE-NEXT: vmov r3, s3 -; CHECK-BE-NEXT: vmov r0, s6 -; CHECK-BE-NEXT: vmov r2, s2 -; CHECK-BE-NEXT: adds.w r12, r1, r3 -; CHECK-BE-NEXT: vmov r3, s5 -; CHECK-BE-NEXT: vmov r1, s0 -; CHECK-BE-NEXT: adc.w lr, r0, r2 -; CHECK-BE-NEXT: vmov r0, s1 -; CHECK-BE-NEXT: vmov r2, s4 -; CHECK-BE-NEXT: adds r0, r0, r3 -; CHECK-BE-NEXT: adcs r1, r2 -; CHECK-BE-NEXT: vmov q0[2], q0[0], r1, lr -; CHECK-BE-NEXT: vmov q0[3], q0[1], r0, r12 +; CHECK-BE-NEXT: add.w r12, sp, #8 +; CHECK-BE-NEXT: vldrw.u32 q0, [r12] +; CHECK-BE-NEXT: vmov lr, s3 +; CHECK-BE-NEXT: vmov r12, s2 +; CHECK-BE-NEXT: adds.w lr, lr, r3 +; CHECK-BE-NEXT: vmov r3, s0 +; CHECK-BE-NEXT: adc.w r12, r12, r2 +; CHECK-BE-NEXT: vmov r2, s1 +; CHECK-BE-NEXT: adds r1, r1, r2 +; CHECK-BE-NEXT: adcs r0, r3 +; CHECK-BE-NEXT: vmov q0[2], q0[0], r0, r12 +; CHECK-BE-NEXT: vmov q0[3], q0[1], r1, lr ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vmov r1, r0, d2 ; CHECK-BE-NEXT: vmov r3, r2, d3 @@ -149,24 +136,18 @@ ; CHECK-FP: @ %bb.0: @ %entry ; CHECK-FP-NEXT: .save {r7, lr} ; CHECK-FP-NEXT: push {r7, lr} -; CHECK-FP-NEXT: vmov d1, r2, r3 -; CHECK-FP-NEXT: vmov d0, r0, r1 -; CHECK-FP-NEXT: add r0, sp, #8 -; CHECK-FP-NEXT: vldrw.u32 q1, [r0] -; CHECK-FP-NEXT: vmov r1, s2 -; CHECK-FP-NEXT: vmov r0, s3 -; CHECK-FP-NEXT: vmov r3, s6 -; CHECK-FP-NEXT: vmov r2, s7 -; CHECK-FP-NEXT: adds.w lr, r1, r3 -; CHECK-FP-NEXT: vmov r3, s0 -; CHECK-FP-NEXT: vmov r1, s4 -; CHECK-FP-NEXT: adc.w r12, r0, r2 -; CHECK-FP-NEXT: vmov r2, s1 -; CHECK-FP-NEXT: vmov r0, s5 -; CHECK-FP-NEXT: adds r1, r1, r3 -; CHECK-FP-NEXT: vmov q0[2], q0[0], r1, lr -; CHECK-FP-NEXT: adcs r0, r2 -; CHECK-FP-NEXT: vmov q0[3], q0[1], r0, r12 +; CHECK-FP-NEXT: add.w r12, sp, #8 +; CHECK-FP-NEXT: vldrw.u32 q0, [r12] +; CHECK-FP-NEXT: vmov lr, s2 +; CHECK-FP-NEXT: vmov r12, s3 +; CHECK-FP-NEXT: adds.w lr, lr, r2 +; CHECK-FP-NEXT: vmov r2, s0 +; CHECK-FP-NEXT: adc.w r12, r12, r3 +; CHECK-FP-NEXT: vmov r3, s1 +; CHECK-FP-NEXT: adds r0, r0, r2 +; CHECK-FP-NEXT: adcs r1, r3 +; CHECK-FP-NEXT: vmov q0[2], q0[0], r0, lr +; CHECK-FP-NEXT: vmov q0[3], q0[1], r1, r12 ; CHECK-FP-NEXT: vmov r0, r1, d0 ; CHECK-FP-NEXT: vmov r2, r3, d1 ; CHECK-FP-NEXT: pop {r7, pc}