Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13690,6 +13690,26 @@ return Result; } + // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d + if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(InDouble.getOperand(1))) { + SDValue BV = InDouble.getOperand(0); + // Look up through any nop bitcasts + while ( + (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64) && + BV.getOpcode() == ISD::BITCAST) + BV = BV.getOperand(0); + if (BV.getValueType() != MVT::v4i32 || BV.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0; + if (Subtarget->isLittle()) + return DCI.DAG.getMergeValues( + {BV.getOperand(Offset), BV.getOperand(Offset + 1)}, SDLoc(N)); + else + return DCI.DAG.getMergeValues( + {BV.getOperand(Offset + 1), BV.getOperand(Offset)}, SDLoc(N)); + } + return SDValue(); } Index: llvm/test/CodeGen/ARM/func-argpassing-endian.ll =================================================================== --- llvm/test/CodeGen/ARM/func-argpassing-endian.ll +++ llvm/test/CodeGen/ARM/func-argpassing-endian.ll @@ -100,35 +100,13 @@ } define <4 x i32> @return_v4i32() { -; CHECK-LE-LABEL: return_v4i32: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: adr r0, .LCPI6_0 -; CHECK-LE-NEXT: vld1.64 {d16, d17}, [r0:128] -; CHECK-LE-NEXT: vmov r0, r1, d16 -; CHECK-LE-NEXT: vmov r2, r3, d17 -; CHECK-LE-NEXT: bx lr -; CHECK-LE-NEXT: .p2align 4 -; CHECK-LE-NEXT: @ %bb.1: -; CHECK-LE-NEXT: .LCPI6_0: -; CHECK-LE-NEXT: .long 42 @ double 9.1245819032257467E-313 -; CHECK-LE-NEXT: .long 43 -; CHECK-LE-NEXT: .long 44 @ double 9.5489810615176143E-313 -; CHECK-LE-NEXT: .long 45 -; -; CHECK-BE-LABEL: return_v4i32: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: adr r0, .LCPI6_0 -; CHECK-BE-NEXT: vld1.64 {d16, d17}, [r0:128] -; CHECK-BE-NEXT: vmov r1, r0, d16 -; CHECK-BE-NEXT: vmov r3, r2, d17 -; CHECK-BE-NEXT: bx lr -; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: @ %bb.1: -; CHECK-BE-NEXT: .LCPI6_0: -; CHECK-BE-NEXT: .long 42 @ double 8.912382324178626E-313 -; CHECK-BE-NEXT: .long 43 -; CHECK-BE-NEXT: .long 44 @ double 9.3367814824704935E-313 -; CHECK-BE-NEXT: .long 45 +; CHECK-LABEL: return_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov r0, #42 +; CHECK-NEXT: mov r1, #43 +; CHECK-NEXT: mov r2, #44 +; CHECK-NEXT: mov r3, #45 +; CHECK-NEXT: bx lr ret < 4 x i32> < i32 42, i32 43, i32 44, i32 45 > } Index: llvm/test/CodeGen/ARM/vcvt.ll =================================================================== --- llvm/test/CodeGen/ARM/vcvt.ll +++ llvm/test/CodeGen/ARM/vcvt.ll @@ -258,26 +258,24 @@ define <2 x i64> @fix_float_to_i64(<2 x float> %in) { ; CHECK-LABEL: fix_float_to_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: .vsave {d8} +; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vadd.f32 d8, d16, d16 -; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmov r1, s16 -; CHECK-NEXT: vmov.32 d9[0], r0 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: vmov.32 d9[1], r4 -; CHECK-NEXT: vmov.32 d8[1], r1 -; CHECK-NEXT: vmov r2, r3, d9 -; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, lr} +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr %scale = fmul <2 x float> %in, @@ -320,30 +318,26 @@ define <2 x i64> @fix_double_to_i64(<2 x double> %in) { ; CHECK-LABEL: fix_double_to_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov d16, r2, r3 -; CHECK-NEXT: vadd.f64 d16, d16, d16 -; CHECK-NEXT: vmov r2, r3, d16 +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: .vsave {d8} +; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vadd.f64 d16, d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov d16, r2, r3 ; CHECK-NEXT: vadd.f64 d8, d16, d16 -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: mov r1, r3 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vmov r2, r1, d8 -; CHECK-NEXT: vmov.32 d9[0], r0 -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: vmov.32 d9[1], r4 -; CHECK-NEXT: vmov.32 d8[1], r1 -; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, lr} +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: mov pc, lr %scale = fmul <2 x double> %in, %conv = fptoui <2 x double> %scale to <2 x i64> Index: llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll +++ llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll @@ -59,22 +59,18 @@ ; CHECK-FP-NEXT: vmov d0, r0, r1 ; CHECK-FP-NEXT: add r0, sp, #8 ; CHECK-FP-NEXT: vldrw.u32 q1, [r0] -; CHECK-FP-NEXT: vmov r1, s2 -; CHECK-FP-NEXT: vmov r0, s3 -; CHECK-FP-NEXT: vmov r3, s6 -; CHECK-FP-NEXT: vmov r2, s7 -; CHECK-FP-NEXT: adds.w lr, r1, r3 -; CHECK-FP-NEXT: vmov r3, s0 -; CHECK-FP-NEXT: vmov r1, s4 -; CHECK-FP-NEXT: adc.w r12, r0, r2 -; CHECK-FP-NEXT: vmov r2, s1 -; CHECK-FP-NEXT: vmov r0, s5 -; CHECK-FP-NEXT: adds r1, r1, r3 -; CHECK-FP-NEXT: vmov q0[2], q0[0], r1, lr -; CHECK-FP-NEXT: adcs r0, r2 -; CHECK-FP-NEXT: vmov q0[3], q0[1], r0, r12 -; CHECK-FP-NEXT: vmov r0, r1, d0 -; CHECK-FP-NEXT: vmov r2, r3, d1 +; CHECK-FP-NEXT: vmov lr, s2 +; CHECK-FP-NEXT: vmov r0, s0 +; CHECK-FP-NEXT: vmov r3, s4 +; CHECK-FP-NEXT: vmov r1, s1 +; CHECK-FP-NEXT: vmov r2, s5 +; CHECK-FP-NEXT: vmov r12, s3 +; CHECK-FP-NEXT: adds r0, r0, r3 +; CHECK-FP-NEXT: vmov r3, s7 +; CHECK-FP-NEXT: adcs r1, r2 +; CHECK-FP-NEXT: vmov r2, s6 +; CHECK-FP-NEXT: adds.w r2, r2, lr +; CHECK-FP-NEXT: adc.w r3, r3, r12 ; CHECK-FP-NEXT: pop {r7, pc} entry: %sum = add <2 x i64> %lhs, %rhs Index: llvm/test/CodeGen/Thumb2/mve-vcreate.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcreate.ll +++ llvm/test/CodeGen/Thumb2/mve-vcreate.ll @@ -230,26 +230,23 @@ define hidden <8 x i16> @create_i16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d, i16 zeroext %a2, i16 zeroext %b2, i16 zeroext %c2, i16 zeroext %d2) local_unnamed_addr #0 { ; CHECK-LABEL: create_i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r5, r7, r9, lr} -; CHECK-NEXT: push.w {r5, r7, r9, lr} -; CHECK-NEXT: ldr.w r12, [sp, #24] -; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: ldr r5, [sp, #28] -; CHECK-NEXT: lsll r2, r9, #16 -; CHECK-NEXT: lsll r12, r7, #16 -; CHECK-NEXT: orr.w r5, r5, r12 -; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: vmov q0[2], q0[0], r2, r5 -; CHECK-NEXT: ldrd r2, r3, [sp, #16] ; CHECK-NEXT: orr.w r0, r1, r0, lsl #16 -; CHECK-NEXT: orr.w r0, r0, r9 -; CHECK-NEXT: orr.w r2, r3, r2, lsl #16 -; CHECK-NEXT: orrs r2, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: pop.w {r5, r7, r9, pc} +; CHECK-NEXT: lsll r2, r7, #16 +; CHECK-NEXT: ldr r4, [sp, #24] +; CHECK-NEXT: orr.w r1, r0, r7 +; CHECK-NEXT: ldrd r0, r7, [sp, #16] +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: lsll r4, r5, #16 +; CHECK-NEXT: orr.w r0, r7, r0, lsl #16 +; CHECK-NEXT: orr.w r12, r0, r5 +; CHECK-NEXT: orr.w r0, r2, r3 +; CHECK-NEXT: ldr r2, [sp, #28] +; CHECK-NEXT: mov r3, r12 +; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = zext i16 %a to i64 %shl = shl nuw i64 %conv, 48 @@ -308,59 +305,58 @@ define hidden <16 x i8> @create_i8(i8 zeroext %a1, i8 zeroext %b1, i8 zeroext %c1, i8 zeroext %d1, i8 zeroext %a2, i8 zeroext %b2, i8 zeroext %c2, i8 zeroext %d2, i8 zeroext %a3, i8 zeroext %b3, i8 zeroext %c3, i8 zeroext %d3, i8 zeroext %a4, i8 zeroext %b4, i8 zeroext %c4, i8 zeroext %d4) local_unnamed_addr #0 { ; CHECK-LABEL: create_i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: ldr r4, [sp, #68] +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: ldr r4, [sp, #40] ; CHECK-NEXT: mov.w r11, #0 -; CHECK-NEXT: ldr r6, [sp, #64] -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: lsll r4, r11, #16 -; CHECK-NEXT: mov lr, r1 -; CHECK-NEXT: lsll r6, r5, #24 +; CHECK-NEXT: ldr r6, [sp, #36] ; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: lsll r4, r11, #16 +; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: lsll r6, r7, #24 +; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: orr.w r1, r6, r4 -; CHECK-NEXT: ldr r4, [sp, #72] -; CHECK-NEXT: mov r12, r3 -; CHECK-NEXT: ldr r3, [sp, #76] -; CHECK-NEXT: lsll r4, r7, #8 -; CHECK-NEXT: ldr r6, [sp, #36] -; CHECK-NEXT: orrs r1, r4 -; CHECK-NEXT: ldr r4, [sp, #32] -; CHECK-NEXT: orr.w r8, r1, r3 -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: lsll r6, r3, #16 -; CHECK-NEXT: lsll r4, r1, #24 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: orrs r4, r6 -; CHECK-NEXT: ldr r6, [sp, #40] -; CHECK-NEXT: lsll r6, r9, #8 -; CHECK-NEXT: orrs r4, r6 ; CHECK-NEXT: ldr r6, [sp, #44] -; CHECK-NEXT: orrs r4, r6 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: ldr r4, [sp, #72] +; CHECK-NEXT: lsll r6, r3, #8 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: orrs r1, r6 ; CHECK-NEXT: ldr r6, [sp, #48] -; CHECK-NEXT: vmov q0[2], q0[0], r4, r8 -; CHECK-NEXT: ldr r4, [sp, #52] -; CHECK-NEXT: lsls r4, r4, #16 -; CHECK-NEXT: orr.w r4, r4, r6, lsl #22 -; CHECK-NEXT: ldr r6, [sp, #56] -; CHECK-NEXT: orr.w r4, r4, r6, lsl #8 -; CHECK-NEXT: ldr r6, [sp, #60] -; CHECK-NEXT: add r4, r6 -; CHECK-NEXT: orrs r4, r5 -; CHECK-NEXT: orr.w r4, r4, r11 -; CHECK-NEXT: orrs r4, r7 -; CHECK-NEXT: lsl.w r7, lr, #16 -; CHECK-NEXT: orr.w r0, r7, r0, lsl #22 +; CHECK-NEXT: lsll r4, r5, #16 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: orr.w r12, r1, r6 +; CHECK-NEXT: ldr r6, [sp, #68] +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: lsll r6, r1, #24 +; CHECK-NEXT: orrs r6, r4 +; CHECK-NEXT: ldr r4, [sp, #76] +; CHECK-NEXT: lsll r4, r9, #8 +; CHECK-NEXT: orrs r6, r4 +; CHECK-NEXT: ldr r4, [sp, #80] +; CHECK-NEXT: orr.w lr, r6, r4 +; CHECK-NEXT: lsl.w r4, r10, #16 +; CHECK-NEXT: orr.w r0, r4, r0, lsl #22 ; CHECK-NEXT: orr.w r0, r0, r2, lsl #8 -; CHECK-NEXT: add r0, r12 +; CHECK-NEXT: add r0, r8 +; CHECK-NEXT: orrs r0, r7 +; CHECK-NEXT: orr.w r0, r0, r11 +; CHECK-NEXT: orr.w r2, r0, r3 +; CHECK-NEXT: ldr r0, [sp, #56] +; CHECK-NEXT: ldr r3, [sp, #52] +; CHECK-NEXT: lsls r0, r0, #16 +; CHECK-NEXT: orr.w r0, r0, r3, lsl #22 +; CHECK-NEXT: ldr r3, [sp, #60] +; CHECK-NEXT: orr.w r0, r0, r3, lsl #8 +; CHECK-NEXT: ldr r3, [sp, #64] +; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: orrs r0, r3 -; CHECK-NEXT: orr.w r0, r0, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r4 -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: vmov r2, r3, d1 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: orrs r0, r5 +; CHECK-NEXT: mov r2, lr +; CHECK-NEXT: orr.w r3, r0, r9 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = zext i8 %a1 to i64 %shl = shl nuw nsw i64 %conv, 54