diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9451,13 +9451,20 @@ // the bottom bits of the predicate. // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect // for BE). + // Speaking of BE, apparently the rest of llvm will assume a reverse order to + // a natural VMSR(load), so needs to be reversed. SDLoc dl(Op); SDValue Load = DAG.getExtLoad( ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), LD->getMemOperand()); - SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Load); + SDValue Val = Load; + if (DAG.getDataLayout().isBigEndian()) + Val = DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load), + DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32)); + SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val); if (MemVT != MVT::v16i1) Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred, DAG.getConstant(0, dl, MVT::i32)); @@ -9498,14 +9505,22 @@ SDValue Build = ST->getValue(); if (MemVT != MVT::v16i1) { SmallVector Ops; - for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) + for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) { + unsigned Elt = DAG.getDataLayout().isBigEndian() + ? MemVT.getVectorNumElements() - I - 1 + : I; Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build, - DAG.getConstant(I, dl, MVT::i32))); + DAG.getConstant(Elt, dl, MVT::i32))); + } for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++) Ops.push_back(DAG.getUNDEF(MVT::i32)); Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops); } SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build); + if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian()) + GRP = DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP), + DAG.getConstant(16, dl, MVT::i32)); return DAG.getTruncStore( ST->getChain(), dl, GRP, ST->getBasePtr(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()), diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -816,102 +816,199 @@ } define void @foo_v4f32_v4f16(<4 x float> *%dest, <4 x i16> *%mask, <4 x half> *%src) { -; CHECK-LABEL: foo_v4f32_v4f16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrh.s32 q0, [r1] -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs lr, p0 -; CHECK-NEXT: and r1, lr, #1 -; CHECK-NEXT: ubfx r3, lr, #4, #1 -; CHECK-NEXT: rsb.w r12, r1, #0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r12, #0, #1 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, lr, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: ubfx r3, lr, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #3, #1 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: bne .LBB18_6 -; CHECK-NEXT: @ %bb.1: @ %else -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: bmi .LBB18_7 -; CHECK-NEXT: .LBB18_2: @ %else2 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: bmi .LBB18_8 -; CHECK-NEXT: .LBB18_3: @ %else5 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: bpl .LBB18_5 -; CHECK-NEXT: .LBB18_4: @ %cond.load7 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vldr.16 s4, [r2, #6] -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: .LBB18_5: @ %else8 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcvtt.f32.f16 s7, s1 -; CHECK-NEXT: vcvtb.f32.f16 s6, s1 -; CHECK-NEXT: vcvtt.f32.f16 s5, s0 -; CHECK-NEXT: vcvtb.f32.f16 s4, s0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s4 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s5 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s6 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s7 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .LBB18_6: @ %cond.load -; CHECK-NEXT: vldr.16 s0, [r2] -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: bpl .LBB18_2 -; CHECK-NEXT: .LBB18_7: @ %cond.load1 -; CHECK-NEXT: vldr.16 s4, [r2, #2] -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: bpl .LBB18_3 -; CHECK-NEXT: .LBB18_8: @ %cond.load4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vldr.16 s1, [r2, #4] -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: bmi .LBB18_4 -; CHECK-NEXT: b .LBB18_5 +; CHECK-LE-LABEL: foo_v4f32_v4f16: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r7, lr} +; CHECK-LE-NEXT: push {r7, lr} +; CHECK-LE-NEXT: .pad #8 +; CHECK-LE-NEXT: sub sp, #8 +; CHECK-LE-NEXT: vldrh.s32 q0, [r1] +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: @ implicit-def: $q0 +; CHECK-LE-NEXT: vmrs lr, p0 +; CHECK-LE-NEXT: and r1, lr, #1 +; CHECK-LE-NEXT: ubfx r3, lr, #4, #1 +; CHECK-LE-NEXT: rsb.w r12, r1, #0 +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r12, #0, #1 +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: ubfx r3, lr, #8, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #2, #1 +; CHECK-LE-NEXT: ubfx r3, lr, #12, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #3, #1 +; CHECK-LE-NEXT: lsls r3, r1, #31 +; CHECK-LE-NEXT: bne .LBB18_6 +; CHECK-LE-NEXT: @ %bb.1: @ %else +; CHECK-LE-NEXT: lsls r3, r1, #30 +; CHECK-LE-NEXT: bmi .LBB18_7 +; CHECK-LE-NEXT: .LBB18_2: @ %else2 +; CHECK-LE-NEXT: lsls r3, r1, #29 +; CHECK-LE-NEXT: bmi .LBB18_8 +; CHECK-LE-NEXT: .LBB18_3: @ %else5 +; CHECK-LE-NEXT: lsls r1, r1, #28 +; CHECK-LE-NEXT: bpl .LBB18_5 +; CHECK-LE-NEXT: .LBB18_4: @ %cond.load7 +; CHECK-LE-NEXT: vmovx.f16 s4, s0 +; CHECK-LE-NEXT: vins.f16 s0, s4 +; CHECK-LE-NEXT: vldr.16 s4, [r2, #6] +; CHECK-LE-NEXT: vins.f16 s1, s4 +; CHECK-LE-NEXT: .LBB18_5: @ %else8 +; CHECK-LE-NEXT: vmrs r2, p0 +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: vcvtt.f32.f16 s7, s1 +; CHECK-LE-NEXT: vcvtb.f32.f16 s6, s1 +; CHECK-LE-NEXT: vcvtt.f32.f16 s5, s0 +; CHECK-LE-NEXT: vcvtb.f32.f16 s4, s0 +; CHECK-LE-NEXT: and r3, r2, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #0, #1 +; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 +; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #2, #1 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: bfi r1, r2, #3, #1 +; CHECK-LE-NEXT: lsls r2, r1, #31 +; CHECK-LE-NEXT: itt ne +; CHECK-LE-NEXT: vmovne r2, s4 +; CHECK-LE-NEXT: strne r2, [r0] +; CHECK-LE-NEXT: lsls r2, r1, #30 +; CHECK-LE-NEXT: itt mi +; CHECK-LE-NEXT: vmovmi r2, s5 +; CHECK-LE-NEXT: strmi r2, [r0, #4] +; CHECK-LE-NEXT: lsls r2, r1, #29 +; CHECK-LE-NEXT: itt mi +; CHECK-LE-NEXT: vmovmi r2, s6 +; CHECK-LE-NEXT: strmi r2, [r0, #8] +; CHECK-LE-NEXT: lsls r1, r1, #28 +; CHECK-LE-NEXT: itt mi +; CHECK-LE-NEXT: vmovmi r1, s7 +; CHECK-LE-NEXT: strmi r1, [r0, #12] +; CHECK-LE-NEXT: add sp, #8 +; CHECK-LE-NEXT: pop {r7, pc} +; CHECK-LE-NEXT: .LBB18_6: @ %cond.load +; CHECK-LE-NEXT: vldr.16 s0, [r2] +; CHECK-LE-NEXT: lsls r3, r1, #30 +; CHECK-LE-NEXT: bpl .LBB18_2 +; CHECK-LE-NEXT: .LBB18_7: @ %cond.load1 +; CHECK-LE-NEXT: vldr.16 s4, [r2, #2] +; CHECK-LE-NEXT: vins.f16 s0, s4 +; CHECK-LE-NEXT: vmovx.f16 s4, s1 +; CHECK-LE-NEXT: vins.f16 s1, s4 +; CHECK-LE-NEXT: lsls r3, r1, #29 +; CHECK-LE-NEXT: bpl .LBB18_3 +; CHECK-LE-NEXT: .LBB18_8: @ %cond.load4 +; CHECK-LE-NEXT: vmovx.f16 s4, s0 +; CHECK-LE-NEXT: vins.f16 s0, s4 +; CHECK-LE-NEXT: vmovx.f16 s4, s1 +; CHECK-LE-NEXT: vldr.16 s1, [r2, #4] +; CHECK-LE-NEXT: vins.f16 s1, s4 +; CHECK-LE-NEXT: lsls r1, r1, #28 +; CHECK-LE-NEXT: bmi .LBB18_4 +; CHECK-LE-NEXT: b .LBB18_5 +; +; CHECK-BE-LABEL: foo_v4f32_v4f16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r7, lr} +; CHECK-BE-NEXT: push {r7, lr} +; CHECK-BE-NEXT: .pad #8 +; CHECK-BE-NEXT: sub sp, #8 +; CHECK-BE-NEXT: vldrh.s32 q0, [r1] +; CHECK-BE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-BE-NEXT: @ implicit-def: $q0 +; CHECK-BE-NEXT: vmrs lr, p0 +; CHECK-BE-NEXT: ubfx r1, lr, #12, #1 +; CHECK-BE-NEXT: ubfx r3, lr, #8, #1 +; CHECK-BE-NEXT: rsb.w r12, r1, #0 +; CHECK-BE-NEXT: movs r1, #0 +; CHECK-BE-NEXT: bfi r1, r12, #0, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #1, #1 +; CHECK-BE-NEXT: ubfx r3, lr, #4, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #2, #1 +; CHECK-BE-NEXT: and r3, lr, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #3, #1 +; CHECK-BE-NEXT: lsls r3, r1, #31 +; CHECK-BE-NEXT: bne .LBB18_6 +; CHECK-BE-NEXT: @ %bb.1: @ %else +; CHECK-BE-NEXT: lsls r3, r1, #30 +; CHECK-BE-NEXT: bmi .LBB18_7 +; CHECK-BE-NEXT: .LBB18_2: @ %else2 +; CHECK-BE-NEXT: lsls r3, r1, #29 +; CHECK-BE-NEXT: bmi .LBB18_8 +; CHECK-BE-NEXT: .LBB18_3: @ %else5 +; CHECK-BE-NEXT: lsls r1, r1, #28 +; CHECK-BE-NEXT: bpl .LBB18_5 +; CHECK-BE-NEXT: .LBB18_4: @ %cond.load7 +; CHECK-BE-NEXT: vmovx.f16 s4, s0 +; CHECK-BE-NEXT: vins.f16 s0, s4 +; CHECK-BE-NEXT: vldr.16 s4, [r2, #6] +; CHECK-BE-NEXT: vins.f16 s1, s4 +; CHECK-BE-NEXT: .LBB18_5: @ %else8 +; CHECK-BE-NEXT: vmrs r2, p0 +; CHECK-BE-NEXT: movs r1, #0 +; CHECK-BE-NEXT: vcvtt.f32.f16 s7, s1 +; CHECK-BE-NEXT: vcvtb.f32.f16 s6, s1 +; CHECK-BE-NEXT: vcvtt.f32.f16 s5, s0 +; CHECK-BE-NEXT: vcvtb.f32.f16 s4, s0 +; CHECK-BE-NEXT: ubfx r3, r2, #12, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #0, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #1, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #2, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r1, r2, #3, #1 +; CHECK-BE-NEXT: lsls r2, r1, #31 +; CHECK-BE-NEXT: itt ne +; CHECK-BE-NEXT: vmovne r2, s4 +; CHECK-BE-NEXT: strne r2, [r0] +; CHECK-BE-NEXT: lsls r2, r1, #30 +; CHECK-BE-NEXT: itt mi +; CHECK-BE-NEXT: vmovmi r2, s5 +; CHECK-BE-NEXT: strmi r2, [r0, #4] +; CHECK-BE-NEXT: lsls r2, r1, #29 +; CHECK-BE-NEXT: itt mi +; CHECK-BE-NEXT: vmovmi r2, s6 +; CHECK-BE-NEXT: strmi r2, [r0, #8] +; CHECK-BE-NEXT: lsls r1, r1, #28 +; CHECK-BE-NEXT: itt mi +; CHECK-BE-NEXT: vmovmi r1, s7 +; CHECK-BE-NEXT: strmi r1, [r0, #12] +; CHECK-BE-NEXT: add sp, #8 +; CHECK-BE-NEXT: pop {r7, pc} +; CHECK-BE-NEXT: .LBB18_6: @ %cond.load +; CHECK-BE-NEXT: vldr.16 s0, [r2] +; CHECK-BE-NEXT: lsls r3, r1, #30 +; CHECK-BE-NEXT: bpl .LBB18_2 +; CHECK-BE-NEXT: .LBB18_7: @ %cond.load1 +; CHECK-BE-NEXT: vldr.16 s4, [r2, #2] +; CHECK-BE-NEXT: vins.f16 s0, s4 +; CHECK-BE-NEXT: vmovx.f16 s4, s1 +; CHECK-BE-NEXT: vins.f16 s1, s4 +; CHECK-BE-NEXT: lsls r3, r1, #29 +; CHECK-BE-NEXT: bpl .LBB18_3 +; CHECK-BE-NEXT: .LBB18_8: @ %cond.load4 +; CHECK-BE-NEXT: vmovx.f16 s4, s0 +; CHECK-BE-NEXT: vins.f16 s0, s4 +; CHECK-BE-NEXT: vmovx.f16 s4, s1 +; CHECK-BE-NEXT: vldr.16 s1, [r2, #4] +; CHECK-BE-NEXT: vins.f16 s1, s4 +; CHECK-BE-NEXT: lsls r1, r1, #28 +; CHECK-BE-NEXT: bmi .LBB18_4 +; CHECK-BE-NEXT: b .LBB18_5 entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer @@ -922,102 +1019,199 @@ } define void @foo_v4f32_v4f16_unaligned(<4 x float> *%dest, <4 x i16> *%mask, <4 x half> *%src) { -; CHECK-LABEL: foo_v4f32_v4f16_unaligned: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vldrh.s32 q0, [r1] -; CHECK-NEXT: vcmp.s32 gt, q0, zr -; CHECK-NEXT: @ implicit-def: $q0 -; CHECK-NEXT: vmrs lr, p0 -; CHECK-NEXT: and r1, lr, #1 -; CHECK-NEXT: ubfx r3, lr, #4, #1 -; CHECK-NEXT: rsb.w r12, r1, #0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r12, #0, #1 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, lr, #8, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: ubfx r3, lr, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #3, #1 -; CHECK-NEXT: lsls r3, r1, #31 -; CHECK-NEXT: bne .LBB19_6 -; CHECK-NEXT: @ %bb.1: @ %else -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: bmi .LBB19_7 -; CHECK-NEXT: .LBB19_2: @ %else2 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: bmi .LBB19_8 -; CHECK-NEXT: .LBB19_3: @ %else5 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: bpl .LBB19_5 -; CHECK-NEXT: .LBB19_4: @ %cond.load7 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vldr.16 s4, [r2, #6] -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: .LBB19_5: @ %else8 -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vcvtt.f32.f16 s7, s1 -; CHECK-NEXT: vcvtb.f32.f16 s6, s1 -; CHECK-NEXT: vcvtt.f32.f16 s5, s0 -; CHECK-NEXT: vcvtb.f32.f16 s4, s0 -; CHECK-NEXT: and r3, r2, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #0, #1 -; CHECK-NEXT: ubfx r3, r2, #4, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #1, #1 -; CHECK-NEXT: ubfx r3, r2, #8, #1 -; CHECK-NEXT: ubfx r2, r2, #12, #1 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: bfi r1, r3, #2, #1 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: bfi r1, r2, #3, #1 -; CHECK-NEXT: lsls r2, r1, #31 -; CHECK-NEXT: itt ne -; CHECK-NEXT: vmovne r2, s4 -; CHECK-NEXT: strne r2, [r0] -; CHECK-NEXT: lsls r2, r1, #30 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s5 -; CHECK-NEXT: strmi r2, [r0, #4] -; CHECK-NEXT: lsls r2, r1, #29 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r2, s6 -; CHECK-NEXT: strmi r2, [r0, #8] -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: itt mi -; CHECK-NEXT: vmovmi r1, s7 -; CHECK-NEXT: strmi r1, [r0, #12] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r7, pc} -; CHECK-NEXT: .LBB19_6: @ %cond.load -; CHECK-NEXT: vldr.16 s0, [r2] -; CHECK-NEXT: lsls r3, r1, #30 -; CHECK-NEXT: bpl .LBB19_2 -; CHECK-NEXT: .LBB19_7: @ %cond.load1 -; CHECK-NEXT: vldr.16 s4, [r2, #2] -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: lsls r3, r1, #29 -; CHECK-NEXT: bpl .LBB19_3 -; CHECK-NEXT: .LBB19_8: @ %cond.load4 -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vldr.16 s1, [r2, #4] -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: lsls r1, r1, #28 -; CHECK-NEXT: bmi .LBB19_4 -; CHECK-NEXT: b .LBB19_5 +; CHECK-LE-LABEL: foo_v4f32_v4f16_unaligned: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r7, lr} +; CHECK-LE-NEXT: push {r7, lr} +; CHECK-LE-NEXT: .pad #8 +; CHECK-LE-NEXT: sub sp, #8 +; CHECK-LE-NEXT: vldrh.s32 q0, [r1] +; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-LE-NEXT: @ implicit-def: $q0 +; CHECK-LE-NEXT: vmrs lr, p0 +; CHECK-LE-NEXT: and r1, lr, #1 +; CHECK-LE-NEXT: ubfx r3, lr, #4, #1 +; CHECK-LE-NEXT: rsb.w r12, r1, #0 +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r12, #0, #1 +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: ubfx r3, lr, #8, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #2, #1 +; CHECK-LE-NEXT: ubfx r3, lr, #12, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #3, #1 +; CHECK-LE-NEXT: lsls r3, r1, #31 +; CHECK-LE-NEXT: bne .LBB19_6 +; CHECK-LE-NEXT: @ %bb.1: @ %else +; CHECK-LE-NEXT: lsls r3, r1, #30 +; CHECK-LE-NEXT: bmi .LBB19_7 +; CHECK-LE-NEXT: .LBB19_2: @ %else2 +; CHECK-LE-NEXT: lsls r3, r1, #29 +; CHECK-LE-NEXT: bmi .LBB19_8 +; CHECK-LE-NEXT: .LBB19_3: @ %else5 +; CHECK-LE-NEXT: lsls r1, r1, #28 +; CHECK-LE-NEXT: bpl .LBB19_5 +; CHECK-LE-NEXT: .LBB19_4: @ %cond.load7 +; CHECK-LE-NEXT: vmovx.f16 s4, s0 +; CHECK-LE-NEXT: vins.f16 s0, s4 +; CHECK-LE-NEXT: vldr.16 s4, [r2, #6] +; CHECK-LE-NEXT: vins.f16 s1, s4 +; CHECK-LE-NEXT: .LBB19_5: @ %else8 +; CHECK-LE-NEXT: vmrs r2, p0 +; CHECK-LE-NEXT: movs r1, #0 +; CHECK-LE-NEXT: vcvtt.f32.f16 s7, s1 +; CHECK-LE-NEXT: vcvtb.f32.f16 s6, s1 +; CHECK-LE-NEXT: vcvtt.f32.f16 s5, s0 +; CHECK-LE-NEXT: vcvtb.f32.f16 s4, s0 +; CHECK-LE-NEXT: and r3, r2, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #0, #1 +; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #1, #1 +; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 +; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-LE-NEXT: rsbs r3, r3, #0 +; CHECK-LE-NEXT: bfi r1, r3, #2, #1 +; CHECK-LE-NEXT: rsbs r2, r2, #0 +; CHECK-LE-NEXT: bfi r1, r2, #3, #1 +; CHECK-LE-NEXT: lsls r2, r1, #31 +; CHECK-LE-NEXT: itt ne +; CHECK-LE-NEXT: vmovne r2, s4 +; CHECK-LE-NEXT: strne r2, [r0] +; CHECK-LE-NEXT: lsls r2, r1, #30 +; CHECK-LE-NEXT: itt mi +; CHECK-LE-NEXT: vmovmi r2, s5 +; CHECK-LE-NEXT: strmi r2, [r0, #4] +; CHECK-LE-NEXT: lsls r2, r1, #29 +; CHECK-LE-NEXT: itt mi +; CHECK-LE-NEXT: vmovmi r2, s6 +; CHECK-LE-NEXT: strmi r2, [r0, #8] +; CHECK-LE-NEXT: lsls r1, r1, #28 +; CHECK-LE-NEXT: itt mi +; CHECK-LE-NEXT: vmovmi r1, s7 +; CHECK-LE-NEXT: strmi r1, [r0, #12] +; CHECK-LE-NEXT: add sp, #8 +; CHECK-LE-NEXT: pop {r7, pc} +; CHECK-LE-NEXT: .LBB19_6: @ %cond.load +; CHECK-LE-NEXT: vldr.16 s0, [r2] +; CHECK-LE-NEXT: lsls r3, r1, #30 +; CHECK-LE-NEXT: bpl .LBB19_2 +; CHECK-LE-NEXT: .LBB19_7: @ %cond.load1 +; CHECK-LE-NEXT: vldr.16 s4, [r2, #2] +; CHECK-LE-NEXT: vins.f16 s0, s4 +; CHECK-LE-NEXT: vmovx.f16 s4, s1 +; CHECK-LE-NEXT: vins.f16 s1, s4 +; CHECK-LE-NEXT: lsls r3, r1, #29 +; CHECK-LE-NEXT: bpl .LBB19_3 +; CHECK-LE-NEXT: .LBB19_8: @ %cond.load4 +; CHECK-LE-NEXT: vmovx.f16 s4, s0 +; CHECK-LE-NEXT: vins.f16 s0, s4 +; CHECK-LE-NEXT: vmovx.f16 s4, s1 +; CHECK-LE-NEXT: vldr.16 s1, [r2, #4] +; CHECK-LE-NEXT: vins.f16 s1, s4 +; CHECK-LE-NEXT: lsls r1, r1, #28 +; CHECK-LE-NEXT: bmi .LBB19_4 +; CHECK-LE-NEXT: b .LBB19_5 +; +; CHECK-BE-LABEL: foo_v4f32_v4f16_unaligned: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r7, lr} +; CHECK-BE-NEXT: push {r7, lr} +; CHECK-BE-NEXT: .pad #8 +; CHECK-BE-NEXT: sub sp, #8 +; CHECK-BE-NEXT: vldrh.s32 q0, [r1] +; CHECK-BE-NEXT: vcmp.s32 gt, q0, zr +; CHECK-BE-NEXT: @ implicit-def: $q0 +; CHECK-BE-NEXT: vmrs lr, p0 +; CHECK-BE-NEXT: ubfx r1, lr, #12, #1 +; CHECK-BE-NEXT: ubfx r3, lr, #8, #1 +; CHECK-BE-NEXT: rsb.w r12, r1, #0 +; CHECK-BE-NEXT: movs r1, #0 +; CHECK-BE-NEXT: bfi r1, r12, #0, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #1, #1 +; CHECK-BE-NEXT: ubfx r3, lr, #4, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #2, #1 +; CHECK-BE-NEXT: and r3, lr, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #3, #1 +; CHECK-BE-NEXT: lsls r3, r1, #31 +; CHECK-BE-NEXT: bne .LBB19_6 +; CHECK-BE-NEXT: @ %bb.1: @ %else +; CHECK-BE-NEXT: lsls r3, r1, #30 +; CHECK-BE-NEXT: bmi .LBB19_7 +; CHECK-BE-NEXT: .LBB19_2: @ %else2 +; CHECK-BE-NEXT: lsls r3, r1, #29 +; CHECK-BE-NEXT: bmi .LBB19_8 +; CHECK-BE-NEXT: .LBB19_3: @ %else5 +; CHECK-BE-NEXT: lsls r1, r1, #28 +; CHECK-BE-NEXT: bpl .LBB19_5 +; CHECK-BE-NEXT: .LBB19_4: @ %cond.load7 +; CHECK-BE-NEXT: vmovx.f16 s4, s0 +; CHECK-BE-NEXT: vins.f16 s0, s4 +; CHECK-BE-NEXT: vldr.16 s4, [r2, #6] +; CHECK-BE-NEXT: vins.f16 s1, s4 +; CHECK-BE-NEXT: .LBB19_5: @ %else8 +; CHECK-BE-NEXT: vmrs r2, p0 +; CHECK-BE-NEXT: movs r1, #0 +; CHECK-BE-NEXT: vcvtt.f32.f16 s7, s1 +; CHECK-BE-NEXT: vcvtb.f32.f16 s6, s1 +; CHECK-BE-NEXT: vcvtt.f32.f16 s5, s0 +; CHECK-BE-NEXT: vcvtb.f32.f16 s4, s0 +; CHECK-BE-NEXT: ubfx r3, r2, #12, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #0, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #1, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 +; CHECK-BE-NEXT: rsbs r3, r3, #0 +; CHECK-BE-NEXT: bfi r1, r3, #2, #1 +; CHECK-BE-NEXT: rsbs r2, r2, #0 +; CHECK-BE-NEXT: bfi r1, r2, #3, #1 +; CHECK-BE-NEXT: lsls r2, r1, #31 +; CHECK-BE-NEXT: itt ne +; CHECK-BE-NEXT: vmovne r2, s4 +; CHECK-BE-NEXT: strne r2, [r0] +; CHECK-BE-NEXT: lsls r2, r1, #30 +; CHECK-BE-NEXT: itt mi +; CHECK-BE-NEXT: vmovmi r2, s5 +; CHECK-BE-NEXT: strmi r2, [r0, #4] +; CHECK-BE-NEXT: lsls r2, r1, #29 +; CHECK-BE-NEXT: itt mi +; CHECK-BE-NEXT: vmovmi r2, s6 +; CHECK-BE-NEXT: strmi r2, [r0, #8] +; CHECK-BE-NEXT: lsls r1, r1, #28 +; CHECK-BE-NEXT: itt mi +; CHECK-BE-NEXT: vmovmi r1, s7 +; CHECK-BE-NEXT: strmi r1, [r0, #12] +; CHECK-BE-NEXT: add sp, #8 +; CHECK-BE-NEXT: pop {r7, pc} +; CHECK-BE-NEXT: .LBB19_6: @ %cond.load +; CHECK-BE-NEXT: vldr.16 s0, [r2] +; CHECK-BE-NEXT: lsls r3, r1, #30 +; CHECK-BE-NEXT: bpl .LBB19_2 +; CHECK-BE-NEXT: .LBB19_7: @ %cond.load1 +; CHECK-BE-NEXT: vldr.16 s4, [r2, #2] +; CHECK-BE-NEXT: vins.f16 s0, s4 +; CHECK-BE-NEXT: vmovx.f16 s4, s1 +; CHECK-BE-NEXT: vins.f16 s1, s4 +; CHECK-BE-NEXT: lsls r3, r1, #29 +; CHECK-BE-NEXT: bpl .LBB19_3 +; CHECK-BE-NEXT: .LBB19_8: @ %cond.load4 +; CHECK-BE-NEXT: vmovx.f16 s4, s0 +; CHECK-BE-NEXT: vins.f16 s0, s4 +; CHECK-BE-NEXT: vmovx.f16 s4, s1 +; CHECK-BE-NEXT: vldr.16 s1, [r2, #4] +; CHECK-BE-NEXT: vins.f16 s1, s4 +; CHECK-BE-NEXT: lsls r1, r1, #28 +; CHECK-BE-NEXT: bmi .LBB19_4 +; CHECK-BE-NEXT: b .LBB19_5 entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -90,15 +90,15 @@ ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: @ implicit-def: $q1 ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -241,15 +241,15 @@ ; CHECK-BE-NEXT: @ implicit-def: $q0 ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -399,15 +399,15 @@ ; CHECK-BE-NEXT: @ implicit-def: $q0 ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -631,27 +631,27 @@ ; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr ; CHECK-BE-NEXT: @ implicit-def: $q1 ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r2, r1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #6, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -1278,15 +1278,15 @@ ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: @ implicit-def: $q1 ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -1561,27 +1561,27 @@ ; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr ; CHECK-BE-NEXT: @ implicit-def: $q1 ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r2, r1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #6, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -2015,17 +2015,17 @@ ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: vmrs r3, p0 -; CHECK-BE-NEXT: and r1, r3, #1 +; CHECK-BE-NEXT: ubfx r1, r3, #12, #1 ; CHECK-BE-NEXT: rsbs r2, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r3, #4, #1 +; CHECK-BE-NEXT: ubfx r2, r3, #8, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r1, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r3, #8, #1 +; CHECK-BE-NEXT: ubfx r2, r3, #4, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r1, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r3, #12, #1 +; CHECK-BE-NEXT: and r2, r3, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r1, r2, #3, #1 ; CHECK-BE-NEXT: lsls r2, r1, #31 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -67,15 +67,15 @@ ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -257,27 +257,27 @@ ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r2, r1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #6, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -528,14 +528,14 @@ ; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r3, r2, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -763,27 +763,27 @@ ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r2, r1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 ; CHECK-BE-NEXT: rsbs r3, r2, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: bfi r2, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 +; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r2, r3, #6, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -1180,15 +1180,15 @@ ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #12, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -1307,17 +1307,17 @@ ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.f32 s4, #0 +; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: it gt ; CHECK-BE-NEXT: movgt r1, #1 ; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: vcmp.f32 s5, #0 +; CHECK-BE-NEXT: vcmp.f32 s6, #0 ; CHECK-BE-NEXT: cset r1, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r1, r1, #1 -; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcmp.f32 s5, #0 ; CHECK-BE-NEXT: rsb.w r3, r1, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 @@ -1328,7 +1328,7 @@ ; CHECK-BE-NEXT: cset r3, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r3, r3, #1 -; CHECK-BE-NEXT: vcmp.f32 s7, #0 +; CHECK-BE-NEXT: vcmp.f32 s4, #0 ; CHECK-BE-NEXT: rsb.w r3, r3, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 @@ -1479,17 +1479,17 @@ ; CHECK-BE-NEXT: sub sp, #4 ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.f32 s4, #0 +; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: it gt ; CHECK-BE-NEXT: movgt r1, #1 ; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: vcmp.f32 s5, #0 +; CHECK-BE-NEXT: vcmp.f32 s6, #0 ; CHECK-BE-NEXT: cset r1, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r1, r1, #1 -; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcmp.f32 s5, #0 ; CHECK-BE-NEXT: rsb.w r3, r1, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 @@ -1500,7 +1500,7 @@ ; CHECK-BE-NEXT: cset r3, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r3, r3, #1 -; CHECK-BE-NEXT: vcmp.f32 s7, #0 +; CHECK-BE-NEXT: vcmp.f32 s4, #0 ; CHECK-BE-NEXT: rsb.w r3, r3, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 @@ -1659,17 +1659,17 @@ ; CHECK-BE-NEXT: sub sp, #20 ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vcmp.f32 s4, #0 +; CHECK-BE-NEXT: vcmp.f32 s7, #0 ; CHECK-BE-NEXT: movs r2, #0 ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: it gt ; CHECK-BE-NEXT: movgt r1, #1 ; CHECK-BE-NEXT: cmp r1, #0 -; CHECK-BE-NEXT: vcmp.f32 s5, #0 +; CHECK-BE-NEXT: vcmp.f32 s6, #0 ; CHECK-BE-NEXT: cset r1, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r1, r1, #1 -; CHECK-BE-NEXT: vcmp.f32 s6, #0 +; CHECK-BE-NEXT: vcmp.f32 s5, #0 ; CHECK-BE-NEXT: rsb.w r3, r1, #0 ; CHECK-BE-NEXT: mov.w r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 @@ -1680,7 +1680,7 @@ ; CHECK-BE-NEXT: cset r3, ne ; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-BE-NEXT: and r3, r3, #1 -; CHECK-BE-NEXT: vcmp.f32 s7, #0 +; CHECK-BE-NEXT: vcmp.f32 s4, #0 ; CHECK-BE-NEXT: rsb.w r3, r3, #0 ; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll @@ -28,9 +28,10 @@ ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: .pad #4 ; CHECK-BE-NEXT: sub sp, #4 -; CHECK-BE-NEXT: and r0, r0, #15 +; CHECK-BE-NEXT: rbit r0, r0 ; CHECK-BE-NEXT: vmov.i8 q1, #0x0 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff +; CHECK-BE-NEXT: lsrs r0, r0, #28 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q2, q1 ; CHECK-BE-NEXT: vmov.u8 r0, q1[2] @@ -90,7 +91,9 @@ ; CHECK-BE-NEXT: sub sp, #8 ; CHECK-BE-NEXT: uxtb r0, r0 ; CHECK-BE-NEXT: vmov.i8 q1, #0x0 +; CHECK-BE-NEXT: rbit r0, r0 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff +; CHECK-BE-NEXT: lsrs r0, r0, #24 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q2, q2, q1 ; CHECK-BE-NEXT: vmov.u8 r0, q2[0] @@ -153,10 +156,13 @@ ; CHECK-BE-NEXT: mov r4, sp ; CHECK-BE-NEXT: bfc r4, #0, #4 ; CHECK-BE-NEXT: mov sp, r4 +; CHECK-BE-NEXT: uxth r0, r0 ; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: rbit r0, r0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 ; CHECK-BE-NEXT: sub.w r4, r7, #8 ; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: lsrs r0, r0, #16 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.8 q0, q1 @@ -236,15 +242,15 @@ ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r0, r1, #1 +; CHECK-BE-NEXT: ubfx r0, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r2, r0, #0 ; CHECK-BE-NEXT: movs r0, #0 ; CHECK-BE-NEXT: bfi r0, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #12, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #2, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -300,27 +306,27 @@ ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r0, r1, #1 +; CHECK-BE-NEXT: ubfx r0, r1, #14, #1 ; CHECK-BE-NEXT: rsbs r2, r0, #0 ; CHECK-BE-NEXT: movs r0, #0 ; CHECK-BE-NEXT: bfi r0, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #2, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #10, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #2, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #6, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #3, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #6, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #4, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #10, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #5, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #12, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #14, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #2, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r0, r2, #6, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -368,7 +374,8 @@ ; CHECK-BE-NEXT: sub.w r4, r7, #8 ; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr ; CHECK-BE-NEXT: vmrs r0, p0 -; CHECK-BE-NEXT: uxth r0, r0 +; CHECK-BE-NEXT: rbit r0, r0 +; CHECK-BE-NEXT: lsrs r0, r0, #16 ; CHECK-BE-NEXT: mov sp, r4 ; CHECK-BE-NEXT: pop {r4, r6, r7, pc} entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -26,6 +26,8 @@ ; CHECK-BE-NEXT: ldrb r0, [r0] ; CHECK-BE-NEXT: vmov.i8 q1, #0x0 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff +; CHECK-BE-NEXT: rbit r0, r0 +; CHECK-BE-NEXT: lsrs r0, r0, #28 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q2, q1 ; CHECK-BE-NEXT: vmov.u8 r0, q1[2] @@ -80,6 +82,8 @@ ; CHECK-BE-NEXT: ldrb r0, [r0] ; CHECK-BE-NEXT: vmov.i8 q1, #0x0 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff +; CHECK-BE-NEXT: rbit r0, r0 +; CHECK-BE-NEXT: lsrs r0, r0, #24 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q2, q2, q1 ; CHECK-BE-NEXT: vmov.u8 r0, q2[0] @@ -125,7 +129,9 @@ ; CHECK-BE-NEXT: ldrh r0, [r0] ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: rbit r0, r0 ; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: lsrs r0, r0, #16 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.8 q0, q1 @@ -195,14 +201,14 @@ ; CHECK-BE-NEXT: movs r3, #0 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 -; CHECK-BE-NEXT: and r2, r1, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #12, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r3, r2, #0, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r3, r2, #1, #1 -; CHECK-BE-NEXT: ubfx r2, r1, #8, #1 -; CHECK-BE-NEXT: ubfx r1, r1, #12, #1 +; CHECK-BE-NEXT: ubfx r2, r1, #4, #1 +; CHECK-BE-NEXT: and r1, r1, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 ; CHECK-BE-NEXT: bfi r3, r2, #2, #1 ; CHECK-BE-NEXT: rsbs r1, r1, #0 @@ -253,27 +259,27 @@ ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr ; CHECK-BE-NEXT: vmrs r2, p0 -; CHECK-BE-NEXT: and r1, r2, #1 +; CHECK-BE-NEXT: ubfx r1, r2, #14, #1 ; CHECK-BE-NEXT: rsbs r3, r1, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #2, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #12, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #10, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #6, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #3, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #6, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #4, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #10, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #5, #1 -; CHECK-BE-NEXT: ubfx r3, r2, #12, #1 -; CHECK-BE-NEXT: ubfx r2, r2, #14, #1 +; CHECK-BE-NEXT: ubfx r3, r2, #2, #1 +; CHECK-BE-NEXT: and r2, r2, #1 ; CHECK-BE-NEXT: rsbs r3, r3, #0 ; CHECK-BE-NEXT: bfi r1, r3, #6, #1 ; CHECK-BE-NEXT: rsbs r2, r2, #0 @@ -299,6 +305,8 @@ ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr ; CHECK-BE-NEXT: vmrs r1, p0 +; CHECK-BE-NEXT: rbit r1, r1 +; CHECK-BE-NEXT: lsrs r1, r1, #16 ; CHECK-BE-NEXT: strh r1, [r0] ; CHECK-BE-NEXT: bx lr entry: