Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -6442,9 +6442,10 @@ /// immediate" operand (e.g., VMOV). If so, return the encoded value. static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - const SDLoc &dl, EVT &VT, bool is128Bits, + const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type) { unsigned OpCmode, Imm; + bool is128Bits = VectorVT.is128BitVector(); // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified @@ -6562,9 +6563,18 @@ ImmMask <<= 1; } - if (DAG.getDataLayout().isBigEndian()) - // swap higher and lower 32 bit word - Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + if (DAG.getDataLayout().isBigEndian()) { + // Reverse the order of elements within the vector. + unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8; + unsigned Mask = (1 << BytesPerElem) - 1; + unsigned NumElems = 8 / BytesPerElem; + unsigned NewImm = 0; + for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) { + unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask); + NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem; + } + Imm = NewImm; + } // Op=1, Cmode=1110. OpCmode = 0x1e; @@ -6657,7 +6667,7 @@ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), - VMovVT, false, VMOVModImm); + VMovVT, VT, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, @@ -6674,7 +6684,7 @@ // Finally, try a VMVN.i32 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, - false, VMVNModImm); + VT, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); @@ -7184,10 +7194,9 @@ (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) { // Check if an immediate VMOV works. EVT VmovVT; - SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VmovVT, VT.is128BitVector(), - VMOVModImm); + SDValue Val = + isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); @@ -7197,9 +7206,8 @@ // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isVMOVModifiedImm( - NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VmovVT, VT.is128BitVector(), - ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); + NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, + VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); @@ -12333,8 +12341,7 @@ EVT VbicVT; SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VbicVT, VT.is128BitVector(), - OtherModImm); + DAG, dl, VbicVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); @@ -12638,10 +12645,9 @@ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; - SDValue Val = isVMOVModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VorrVT, VT.is128BitVector(), - OtherModImm); + SDValue Val = + isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); Index: llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll =================================================================== --- llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll +++ llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll @@ -98,7 +98,7 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, <4 x half>* %store ) { ; CHECK-LABEL: conv_v4i16_to_v4f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i64 d16, #0xffffffff0000 +; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vrev64.16 d18, d0 ; CHECK-NEXT: vrev64.16 d17, d17 Index: llvm/test/CodeGen/ARM/big-endian-vmov.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/big-endian-vmov.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -mtriple armv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc < %s -mtriple armebv7-eabi -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE + +; CHECK-LABEL: vmov_i8 +; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <8 x i8> @vmov_i8() { + ret <8 x i8> +} + +; CHECK-LABEL: vmov_i16_a: +; CHECK-LE: vmov.i64 d0, #0xffff000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xffff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() { + ret <4 x i16> +} + +; CHECK-LABEL: vmov_i16_b: +; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() { + ret <4 x i16> +} + +; CHECK-LABEL: vmov_i16_c: +; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff00{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() { + ret <4 x i16> +} + +; CHECK-LABEL: vmov_i32_a: +; CHECK-LE: vmov.i64 d0, #0xffffffff00000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xffffffff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_b: +; CHECK-LE: vmov.i64 d0, #0xff00000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_c: +; CHECK-LE: vmov.i64 d0, #0xff0000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff00{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_d: +; CHECK-LE: vmov.i64 d0, #0xff000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff0000{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i32_e: +; CHECK-LE: vmov.i64 d0, #0xff00000000000000{{$}} +; CHECK-BE: vmov.i64 d0, #0xff000000{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() { + ret <2 x i32> +} + +; CHECK-LABEL: vmov_i64_a: +; CHECK: vmov.i8 d0, #0xff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() { + ret <1 x i64> +} + +; CHECK-LABEL: vmov_i64_b: +; CHECK: vmov.i64 d0, #0xffff00ff0000ff{{$}} +; CHECK-NEXT: bx lr +define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() { + ret <1 x i64> +} Index: llvm/test/CodeGen/ARM/vmov.ll =================================================================== --- llvm/test/CodeGen/ARM/vmov.ll +++ llvm/test/CodeGen/ARM/vmov.ll @@ -192,8 +192,7 @@ define <2 x i64> @v_movQi64() nounwind { ;CHECK-LABEL: v_movQi64: -;FIXME: Incorrect immediate is generated for big-endian -;FIXME-CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff +;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff ;CHECK-NOT: vrev ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > }