Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -7181,7 +7181,7 @@ return DAG.getUNDEF(VT); if ((ST->hasNEON() && SplatBitSize <= 64) || - (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) { + (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) { // Check if an immediate VMOV works. EVT VmovVT; SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2382,6 +2382,8 @@ (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>; def : Pat<(v4i32 (ARMvmovImm timm:$simm)), (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>; + def : Pat<(v2i64 (ARMvmovImm timm:$simm)), + (v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>; def : Pat<(v8i16 (ARMvmvnImm timm:$simm)), (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>; Index: llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -251,21 +251,13 @@ ; CHECK-LABEL: ptr_v2i16_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: ldrd r1, r0, [r0] -; CHECK-NEXT: adr r2, .LCPI9_0 +; CHECK-NEXT: vmov.i64 q0, #0xffff ; CHECK-NEXT: ldrh r0, [r0] -; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: ldrh r1, [r1] ; CHECK-NEXT: vmov.32 q1[0], r1 ; CHECK-NEXT: vmov.32 q1[2], r0 ; CHECK-NEXT: vand q0, q1, q0 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI9_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4 %gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> %offs, i32 2, <2 x i1> , <2 x i16> undef) Index: llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -408,6 +408,7 @@ ; CHECK-LE-NEXT: ldrd lr, r12, [r1] ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q1 +; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff ; CHECK-LE-NEXT: rsbs.w r3, lr, #0 ; CHECK-LE-NEXT: vmov.32 q0[0], lr ; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31 @@ -424,23 +425,21 @@ ; CHECK-LE-NEXT: bfi r1, lr, #0, #1 ; CHECK-LE-NEXT: vmov.32 q0[2], r12 ; CHECK-LE-NEXT: and r3, r1, #3 -; CHECK-LE-NEXT: adr.w r12, .LCPI7_0 +; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: lsls r1, r1, #31 ; CHECK-LE-NEXT: itt ne ; CHECK-LE-NEXT: ldrne r1, [r2] ; CHECK-LE-NEXT: vmovne.32 q1[0], r1 ; CHECK-LE-NEXT: lsls r1, r3, #30 -; CHECK-LE-NEXT: vmov r3, s0 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 +; CHECK-LE-NEXT: vmov r1, s0 ; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vldrw.u32 q2, [r12] -; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: vand q1, q1, q2 -; CHECK-LE-NEXT: rsbs r1, r3, #0 -; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-LE-NEXT: rsbs r3, r1, #0 ; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-LE-NEXT: it lt ; CHECK-LE-NEXT: movlt.w r12, #1 ; CHECK-LE-NEXT: rsbs r1, r3, #0 @@ -460,13 +459,6 @@ ; CHECK-LE-NEXT: vstrmi d3, [r0, #8] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: pop {r7, pc} -; CHECK-LE-NEXT: .p2align 4 -; CHECK-LE-NEXT: @ %bb.1: -; CHECK-LE-NEXT: .LCPI7_0: -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 ; ; CHECK-BE-LABEL: foo_zext_v2i64_v2i32: ; CHECK-BE: @ %bb.0: @ %entry @@ -511,15 +503,13 @@ ; CHECK-BE-NEXT: .LBB7_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vmov r3, s15 -; CHECK-BE-NEXT: adr.w r12, .LCPI7_0 -; CHECK-BE-NEXT: vldrb.u8 q0, [r12] +; CHECK-BE-NEXT: vmov r1, s15 ; CHECK-BE-NEXT: mov.w r12, #0 -; CHECK-BE-NEXT: vrev64.8 q2, q0 -; CHECK-BE-NEXT: vand q0, q1, q2 -; CHECK-BE-NEXT: rsbs r1, r3, #0 -; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff +; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: vmov r3, s13 +; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-BE-NEXT: it lt ; CHECK-BE-NEXT: movlt.w r12, #1 ; CHECK-BE-NEXT: rsbs r1, r3, #0 @@ -539,13 +529,6 @@ ; CHECK-BE-NEXT: vstrmi d1, [r0, #8] ; CHECK-BE-NEXT: add sp, #4 ; CHECK-BE-NEXT: pop {r7, pc} -; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: @ %bb.5: -; CHECK-BE-NEXT: .LCPI7_0: -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer @@ -565,6 +548,7 @@ ; CHECK-LE-NEXT: ldrd lr, r12, [r1] ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: @ implicit-def: $q1 +; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff ; CHECK-LE-NEXT: rsbs.w r3, lr, #0 ; CHECK-LE-NEXT: vmov.32 q0[0], lr ; CHECK-LE-NEXT: sbcs.w r3, r1, lr, asr #31 @@ -581,23 +565,21 @@ ; CHECK-LE-NEXT: bfi r1, lr, #0, #1 ; CHECK-LE-NEXT: vmov.32 q0[2], r12 ; CHECK-LE-NEXT: and r3, r1, #3 -; CHECK-LE-NEXT: adr.w r12, .LCPI8_0 +; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: lsls r1, r1, #31 ; CHECK-LE-NEXT: itt ne ; CHECK-LE-NEXT: ldrne r1, [r2] ; CHECK-LE-NEXT: vmovne.32 q1[0], r1 ; CHECK-LE-NEXT: lsls r1, r3, #30 -; CHECK-LE-NEXT: vmov r3, s0 ; CHECK-LE-NEXT: itt mi ; CHECK-LE-NEXT: ldrmi r1, [r2, #4] ; CHECK-LE-NEXT: vmovmi.32 q1[2], r1 +; CHECK-LE-NEXT: vmov r1, s0 ; CHECK-LE-NEXT: movs r2, #0 -; CHECK-LE-NEXT: vldrw.u32 q2, [r12] -; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: vand q1, q1, q2 -; CHECK-LE-NEXT: rsbs r1, r3, #0 -; CHECK-LE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-LE-NEXT: rsbs r3, r1, #0 ; CHECK-LE-NEXT: vmov r3, s2 +; CHECK-LE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-LE-NEXT: it lt ; CHECK-LE-NEXT: movlt.w r12, #1 ; CHECK-LE-NEXT: rsbs r1, r3, #0 @@ -619,13 +601,6 @@ ; CHECK-LE-NEXT: strdmi r1, r2, [r0, #8] ; CHECK-LE-NEXT: add sp, #4 ; CHECK-LE-NEXT: pop {r7, pc} -; CHECK-LE-NEXT: .p2align 4 -; CHECK-LE-NEXT: @ %bb.1: -; CHECK-LE-NEXT: .LCPI8_0: -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 -; CHECK-LE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-LE-NEXT: .long 0 @ 0x0 ; ; CHECK-BE-LABEL: foo_zext_v2i64_v2i32_unaligned: ; CHECK-BE: @ %bb.0: @ %entry @@ -670,15 +645,13 @@ ; CHECK-BE-NEXT: .LBB8_4: @ %else2 ; CHECK-BE-NEXT: vrev64.32 q3, q2 ; CHECK-BE-NEXT: movs r2, #0 -; CHECK-BE-NEXT: vmov r3, s15 -; CHECK-BE-NEXT: adr.w r12, .LCPI8_0 -; CHECK-BE-NEXT: vldrb.u8 q0, [r12] +; CHECK-BE-NEXT: vmov r1, s15 ; CHECK-BE-NEXT: mov.w r12, #0 -; CHECK-BE-NEXT: vrev64.8 q2, q0 -; CHECK-BE-NEXT: vand q0, q1, q2 -; CHECK-BE-NEXT: rsbs r1, r3, #0 -; CHECK-BE-NEXT: sbcs.w r1, r2, r3, asr #31 +; CHECK-BE-NEXT: vmov.i64 q0, #0xffffffff +; CHECK-BE-NEXT: vand q0, q1, q0 +; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: vmov r3, s13 +; CHECK-BE-NEXT: sbcs.w r1, r2, r1, asr #31 ; CHECK-BE-NEXT: it lt ; CHECK-BE-NEXT: movlt.w r12, #1 ; CHECK-BE-NEXT: rsbs r1, r3, #0 @@ -700,13 +673,6 @@ ; CHECK-BE-NEXT: strdmi r2, r1, [r0, #8] ; CHECK-BE-NEXT: add sp, #4 ; CHECK-BE-NEXT: pop {r7, pc} -; CHECK-BE-NEXT: .p2align 4 -; CHECK-BE-NEXT: @ %bb.5: -; CHECK-BE-NEXT: .LCPI8_0: -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-BE-NEXT: .long 0 @ 0x0 -; CHECK-BE-NEXT: .long 4294967295 @ 0xffffffff entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer Index: llvm/test/CodeGen/Thumb2/mve-sext.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-sext.ll +++ llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -430,17 +430,9 @@ define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) { ; CHECK-LABEL: zext_v2i32_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI20_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI20_0: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %0 = zext <2 x i32> %src to <2 x i64> ret <2 x i64> %0 Index: llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll +++ llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll @@ -36,8 +36,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x) { ; CHECK-LABEL: add_v2i32_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI3_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r3, s0 @@ -46,13 +45,6 @@ ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI3_0: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i32> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -138,11 +130,10 @@ ; CHECK-LABEL: add_v8i16_v8i64_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vmov.32 q2[0], r0 ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: adr r0, .LCPI10_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: vmov r1, s8 @@ -182,13 +173,6 @@ ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI10_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <8 x i16> %x to <8 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) @@ -265,21 +249,13 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x) { ; CHECK-LABEL: add_v2i16_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI12_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: vmov r1, s3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -489,11 +465,10 @@ ; CHECK-LABEL: add_v16i8_v16i64_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.u8 r0, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vmov.32 q2[0], r0 ; CHECK-NEXT: vmov.u8 r0, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: adr r0, .LCPI23_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: vmov r1, s8 @@ -585,13 +560,6 @@ ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI23_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) @@ -736,21 +704,13 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x) { ; CHECK-LABEL: add_v2i8_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI25_0 -; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: add r0, r1 ; CHECK-NEXT: vmov r1, s3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -832,8 +792,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adr r2, .LCPI31_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vmov.i64 q1, #0xffffffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -844,13 +803,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI31_0: -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i32> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -947,11 +899,10 @@ ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov.u16 r2, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vmov.32 q2[0], r2 ; CHECK-NEXT: vmov.u16 r2, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: adr r2, .LCPI38_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: vmov r3, s8 @@ -993,13 +944,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI38_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <8 x i16> %x to <8 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> %xx) @@ -1082,8 +1026,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) { ; CHECK-LABEL: add_v2i16_v2i64_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r2, .LCPI40_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vmov.i64 q1, #0xffff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -1092,13 +1035,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) @@ -1323,11 +1259,10 @@ ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: vmov.u8 r2, q0[0] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vmov.32 q2[0], r2 ; CHECK-NEXT: vmov.u8 r2, q0[1] ; CHECK-NEXT: vmov.32 q2[2], r2 -; CHECK-NEXT: adr r2, .LCPI51_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vand q2, q2, q1 ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: vmov r3, s8 @@ -1421,13 +1356,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> %xx) @@ -1578,8 +1506,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) { ; CHECK-LABEL: add_v2i8_v2i64_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r2, .LCPI53_0 -; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vmov.i64 q1, #0xff ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmov r3, s0 @@ -1588,13 +1515,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI53_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %z = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %xx) Index: llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -174,8 +174,7 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) { ; CHECK-LABEL: add_v2i16_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI12_0 -; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.i64 q2, #0xffff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r0, s4 @@ -185,13 +184,6 @@ ; CHECK-NEXT: umull r0, r1, r1, r0 ; CHECK-NEXT: umlal r0, r1, r3, r2 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %yy = zext <2 x i16> %y to <2 x i64> @@ -497,11 +489,10 @@ ; CHECK-NEXT: vmov.u8 r1, q0[0] ; CHECK-NEXT: vmov.32 q3[0], r0 ; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: adr r0, .LCPI23_0 -; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vmov.32 q4[0], r1 ; CHECK-NEXT: vmov.u8 r1, q0[1] +; CHECK-NEXT: vmov.32 q3[2], r0 +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vmov.32 q4[2], r1 ; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q4, q4, q2 @@ -693,13 +684,6 @@ ; CHECK-NEXT: umlal r0, r1, r3, r2 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI23_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %yy = zext <16 x i8> %y to <16 x i64> @@ -878,8 +862,7 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: add_v2i8_v2i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI25_0 -; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r0, s6 @@ -891,13 +874,6 @@ ; CHECK-NEXT: add r0, r2 ; CHECK-NEXT: orrs r1, r3 ; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %yy = zext <2 x i8> %y to <2 x i64> @@ -1154,8 +1130,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adr r2, .LCPI40_0 -; CHECK-NEXT: vldrw.u32 q2, [r2] +; CHECK-NEXT: vmov.i64 q2, #0xffff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r2, s4 @@ -1167,13 +1142,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adc.w r1, r1, lr ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 65535 @ 0xffff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i16> %x to <2 x i64> %yy = zext <2 x i16> %y to <2 x i64> @@ -1494,17 +1462,16 @@ ; CHECK-NEXT: vmov.u8 r3, q0[0] ; CHECK-NEXT: vmov.32 q3[0], r2 ; CHECK-NEXT: vmov.u8 r2, q1[1] -; CHECK-NEXT: vmov.32 q3[2], r2 -; CHECK-NEXT: adr r2, .LCPI51_0 -; CHECK-NEXT: vldrw.u32 q2, [r2] ; CHECK-NEXT: vmov.32 q4[0], r3 ; CHECK-NEXT: vmov.u8 r3, q0[1] -; CHECK-NEXT: vmov.u8 r4, q0[2] +; CHECK-NEXT: vmov.32 q3[2], r2 +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vmov.32 q4[2], r3 ; CHECK-NEXT: vand q3, q3, q2 ; CHECK-NEXT: vand q4, q4, q2 ; CHECK-NEXT: vmov r2, s14 ; CHECK-NEXT: vmov r3, s18 +; CHECK-NEXT: vmov.u8 r4, q0[2] ; CHECK-NEXT: umull r12, lr, r3, r2 ; CHECK-NEXT: vmov r3, s16 ; CHECK-NEXT: vmov r2, s12 @@ -1692,13 +1659,6 @@ ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop {r4, r5, r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI51_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <16 x i8> %x to <16 x i64> %yy = zext <16 x i8> %y to <16 x i64> @@ -1885,8 +1845,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adr r2, .LCPI53_0 -; CHECK-NEXT: vldrw.u32 q2, [r2] +; CHECK-NEXT: vmov.i64 q2, #0xff ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vmov r2, s6 @@ -1900,13 +1859,6 @@ ; CHECK-NEXT: adds r0, r0, r2 ; CHECK-NEXT: adcs r1, r3 ; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI53_0: -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 255 @ 0xff -; CHECK-NEXT: .long 0 @ 0x0 entry: %xx = zext <2 x i8> %x to <2 x i64> %yy = zext <2 x i8> %y to <2 x i64> Index: llvm/test/CodeGen/Thumb2/mve-vmovimm.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -355,30 +355,13 @@ define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() { ; CHECKLE-LABEL: mov_int64_ff: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI19_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI19_0: -; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321 -; CHECKLE-NEXT: .long 0 -; CHECKLE-NEXT: .long 255 @ double 1.2598673968951787E-321 -; CHECKLE-NEXT: .long 0 ; ; CHECKBE-LABEL: mov_int64_ff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI19_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI19_0: -; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321 -; CHECKBE-NEXT: .long 255 -; CHECKBE-NEXT: .long 0 @ double 1.2598673968951787E-321 -; CHECKBE-NEXT: .long 255 entry: ret <2 x i64> < i64 255, i64 255 > } @@ -401,30 +384,13 @@ define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() { ; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI21_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI21_0: -; CHECKLE-NEXT: .long 65535 @ double -5.4874582226568829E+303 -; CHECKLE-NEXT: .long 4278190335 -; CHECKLE-NEXT: .long 65535 @ double -5.4874582226568829E+303 -; CHECKLE-NEXT: .long 4278190335 ; ; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI21_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI21_0: -; CHECKBE-NEXT: .long 4278190335 @ double -5.4874582226568829E+303 -; CHECKBE-NEXT: .long 65535 -; CHECKBE-NEXT: .long 4278190335 @ double -5.4874582226568829E+303 -; CHECKBE-NEXT: .long 65535 entry: ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } @@ -463,30 +429,13 @@ define arm_aapcs_vfpcc <16 x i8> @mov_int64_0f000f0f() { ; CHECKLE-LABEL: mov_int64_0f000f0f: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI23_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xff000000ff00ff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI23_0: -; CHECKLE-NEXT: .long 16711935 @ double 7.0632744699731897E-304 -; CHECKLE-NEXT: .long 16711680 -; CHECKLE-NEXT: .long 16711935 @ double 7.0632744699731897E-304 -; CHECKLE-NEXT: .long 16711680 ; ; CHECKBE-LABEL: mov_int64_0f000f0f: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI23_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI23_0: -; CHECKBE-NEXT: .long 4278255360 @ double -5.8276674374138332E+303 -; CHECKBE-NEXT: .long 65280 -; CHECKBE-NEXT: .long 4278255360 @ double -5.8276674374138332E+303 -; CHECKBE-NEXT: .long 65280 entry: ret <16 x i8> } @@ -494,30 +443,13 @@ define arm_aapcs_vfpcc <8 x i16> @mov_int64_ff00ffff() { ; CHECKLE-LABEL: mov_int64_ff00ffff: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI24_0 -; CHECKLE-NEXT: vldrw.u32 q0, [r0] +; CHECKLE-NEXT: vmov.i64 q0, #0xffffffff0000ffff ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI24_0: -; CHECKLE-NEXT: .long 65535 @ double NaN -; CHECKLE-NEXT: .long 4294967295 -; CHECKLE-NEXT: .long 65535 @ double NaN -; CHECKLE-NEXT: .long 4294967295 ; ; CHECKBE-LABEL: mov_int64_ff00ffff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI24_0 -; CHECKBE-NEXT: vldrb.u8 q1, [r0] -; CHECKBE-NEXT: vrev64.8 q0, q1 +; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI24_0: -; CHECKBE-NEXT: .long 4294901760 @ double NaN -; CHECKBE-NEXT: .long 4294967295 -; CHECKBE-NEXT: .long 4294901760 @ double NaN -; CHECKBE-NEXT: .long 4294967295 entry: ret <8 x i16> } @@ -665,57 +597,18 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) { ; CHECKLE-LABEL: test: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI31_0 -; CHECKLE-NEXT: vldrw.u32 q1, [r0] +; CHECKLE-NEXT: vmov.i64 q1, #0xff000000ff00ff ; CHECKLE-NEXT: vorr q0, q0, q1 ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI31_0: -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 0 @ 0x0 -; CHECKLE-NEXT: .byte 255 @ 0xff -; CHECKLE-NEXT: .byte 0 @ 0x0 ; ; CHECKBE-LABEL: test: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI31_0 +; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000 +; CHECKBE-NEXT: vrev64.8 q2, q1 ; CHECKBE-NEXT: vrev64.8 q1, q0 -; CHECKBE-NEXT: vldrb.u8 q0, [r0] -; CHECKBE-NEXT: vorr q1, q1, q0 +; CHECKBE-NEXT: vorr q1, q1, q2 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI31_0: -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 0 @ 0x0 -; CHECKBE-NEXT: .byte 255 @ 0xff -; CHECKBE-NEXT: .byte 0 @ 0x0 entry: %o = or <16 x i8> %i, ret <16 x i8> %o @@ -724,41 +617,18 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) { ; CHECKLE-LABEL: test2: ; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: adr r0, .LCPI32_0 -; CHECKLE-NEXT: vldrw.u32 q1, [r0] +; CHECKLE-NEXT: vmov.i64 q1, #0xffffffff0000ffff ; CHECKLE-NEXT: vorr q0, q0, q1 ; CHECKLE-NEXT: bx lr -; CHECKLE-NEXT: .p2align 4 -; CHECKLE-NEXT: @ %bb.1: -; CHECKLE-NEXT: .LCPI32_0: -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 0 @ 0x0 -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 0 @ 0x0 -; CHECKLE-NEXT: .short 65535 @ 0xffff -; CHECKLE-NEXT: .short 65535 @ 0xffff ; ; CHECKBE-LABEL: test2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: adr r0, .LCPI32_0 +; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff +; CHECKBE-NEXT: vrev64.16 q2, q1 ; CHECKBE-NEXT: vrev64.16 q1, q0 -; CHECKBE-NEXT: vldrh.u16 q0, [r0] -; CHECKBE-NEXT: vorr q1, q1, q0 +; CHECKBE-NEXT: vorr q1, q1, q2 ; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr -; CHECKBE-NEXT: .p2align 4 -; CHECKBE-NEXT: @ %bb.1: -; CHECKBE-NEXT: .LCPI32_0: -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 0 @ 0x0 -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 0 @ 0x0 -; CHECKBE-NEXT: .short 65535 @ 0xffff -; CHECKBE-NEXT: .short 65535 @ 0xffff entry: %o = or <8 x i16> %i, ret <8 x i16> %o