diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -6443,9 +6443,10 @@ /// immediate" operand (e.g., VMOV). If so, return the encoded value. static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - const SDLoc &dl, EVT &VT, bool is128Bits, + const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type) { unsigned OpCmode, Imm; + bool is128Bits = VectorVT.is128BitVector(); // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified @@ -6563,9 +6564,18 @@ ImmMask <<= 1; } - if (DAG.getDataLayout().isBigEndian()) - // swap higher and lower 32 bit word - Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + if (DAG.getDataLayout().isBigEndian()) { + // Reverse the order of elements within the vector. + unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8; + unsigned Mask = (1 << BytesPerElem) - 1; + unsigned NumElems = 8 / BytesPerElem; + unsigned NewImm = 0; + for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) { + unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask); + NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem; + } + Imm = NewImm; + } // Op=1, Cmode=1110. OpCmode = 0x1e; @@ -6658,7 +6668,7 @@ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), - VMovVT, false, VMOVModImm); + VMovVT, VT, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, @@ -6675,7 +6685,7 @@ // Finally, try a VMVN.i32 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, - false, VMVNModImm); + VT, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); @@ -7185,10 +7195,9 @@ (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) { // Check if an immediate VMOV works. EVT VmovVT; - SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VmovVT, VT.is128BitVector(), - VMOVModImm); + SDValue Val = + isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); @@ -7198,9 +7207,8 @@ // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isVMOVModifiedImm( - NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VmovVT, VT.is128BitVector(), - ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); + NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, + VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); @@ -12403,8 +12411,7 @@ EVT VbicVT; SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VbicVT, VT.is128BitVector(), - OtherModImm); + DAG, dl, VbicVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); @@ -12708,10 +12715,9 @@ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; - SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VorrVT, VT.is128BitVector(), - OtherModImm); + SDValue Val = + isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll --- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll +++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll @@ -98,7 +98,7 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, <4 x half>* %store ) { ; CHECK-LABEL: conv_v4i16_to_v4f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i64 d16, #0xffffffff0000 +; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vrev64.16 d18, d0 ; CHECK-NEXT: vrev64.16 d17, d17 diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -mtriple armv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc < %s -mtriple armebv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE + +; CHECK-LABEL: vmov_i8 +; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <8 x i8> @vmov_i8() { + ret <8 x i8> +} + +; CHECK-LABEL: vmov_i16_a: +; CHECK-LE: vmov.i64 d0, #0xffff000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xffff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() { + ret <4 x i16> +} + +; CHECK-LABEL: vmov_i16_b: +; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() { + ret <4 x i16> +} + +; CHECK-LABEL: vmov_i16_c: +; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff00{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() { + ret <4 x i16> +} + +; CHECK-LABEL: vmov_i32_a: +; CHECK-LE: vmov.i64 d0, #0xffffffff00000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xffffffff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_b: +; CHECK-LE: vmov.i64 d0, #0xff00000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_c: +; CHECK-LE: vmov.i64 d0, #0xff0000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff00{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_d: +; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff0000{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_e: +; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff000000{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i64_a: +; CHECK: vmov.i8 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() { + ret <1 x i64> +} + +; CHECK-LABEL: vmov_i64_b: +; CHECK: vmov.i64 d0, #0xffff00ff0000ff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() { + ret <1 x i64> +} diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll --- a/llvm/test/CodeGen/ARM/vmov.ll +++ b/llvm/test/CodeGen/ARM/vmov.ll @@ -219,15 +219,10 @@ } define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind { -; CHECK-LE-LABEL: v_movQi64: -; CHECK-LE: @ %bb.0: -; CHECK-LE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff -; CHECK-LE-NEXT: mov pc, lr -; -; CHECK-BE-LABEL: v_movQi64: -; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 q0, #0xffffff0000ff -; CHECK-BE-NEXT: mov pc, lr +; CHECK-LABEL: v_movQi64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff +; CHECK-NEXT: mov pc, lr ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -263,15 +263,10 @@ } define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff() { -; CHECKLE-LABEL: mov_int64_ff: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i64 q0, #0xff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int64_ff: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q0, #0xff00000000 -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int64_ff: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i64 q0, #0xff +; CHECK-NEXT: bx lr entry: ret <2 x i64> < i64 255, i64 255 > } @@ -286,15 +281,10 @@ } define arm_aapcs_vfpcc <2 x i64> @mov_int64_ff0000ff0000ffff() { -; CHECKLE-LABEL: mov_int64_ff0000ff0000ffff: -; CHECKLE: @ %bb.0: @ %entry -; CHECKLE-NEXT: vmov.i64 q0, #0xff0000ff0000ffff -; CHECKLE-NEXT: bx lr -; -; CHECKBE-LABEL: mov_int64_ff0000ff0000ffff: -; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q0, #0xffffff0000ff -; CHECKBE-NEXT: bx lr +; CHECK-LABEL: mov_int64_ff0000ff0000ffff: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i64 q0, #0xff0000ff0000ffff +; CHECK-NEXT: bx lr entry: ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } @@ -338,7 +328,7 @@ ; ; CHECKBE-LABEL: mov_int64_0f000f0f: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff00ff00 +; CHECKBE-NEXT: vmov.i64 q0, #0xff00ff000000ff00 ; CHECKBE-NEXT: bx lr entry: ret <16 x i8> @@ -352,7 +342,7 @@ ; ; CHECKBE-LABEL: mov_int64_ff00ffff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q0, #0xffffffffffff0000 +; CHECKBE-NEXT: vmov.i64 q0, #0xffff0000ffffffff ; CHECKBE-NEXT: bx lr entry: ret <8 x i16> @@ -494,7 +484,7 @@ ; ; CHECKBE-LABEL: test: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff00ff0000 +; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00 ; CHECKBE-NEXT: vrev64.8 q2, q1 ; CHECKBE-NEXT: vrev64.8 q1, q0 ; CHECKBE-NEXT: vorr q1, q1, q2 @@ -514,7 +504,7 @@ ; ; CHECKBE-LABEL: test2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xffffffffffff +; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff ; CHECKBE-NEXT: vrev64.16 q2, q1 ; CHECKBE-NEXT: vrev64.16 q1, q0 ; CHECKBE-NEXT: vorr q1, q1, q2