Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4783,8 +4783,44 @@ // If all elements are constants and the case above didn't get hit, fall back // to the default expansion, which will generate a load from the constant // pool. - if (isConstant) + if (isConstant) { + if (!ST->isLittle()) { + // custom lowering for big endian, reversing the order of elements + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector CV; + EVT OpVT = Op.getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + for (unsigned i = NumElts; i > 0; i--) { + if (ConstantFPSDNode *V = + dyn_cast(Op.getOperand(i-1))) { + CV.push_back(const_cast(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast(Op.getOperand(i-1))) { + if (OpVT==EltVT) + CV.push_back(const_cast(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } + } else { + assert(Op.getOperand(i).getOpcode() == ISD::UNDEF); + Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + unsigned Alignment = cast(CPIdx)->getAlignment(); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + } return SDValue(); + } // Try to lower this in lowering ShuffleVector way. SDValue V0, V1; Index: test/CodeGen/AArch64/build-vector.ll =================================================================== --- test/CodeGen/AArch64/build-vector.ll +++ test/CodeGen/AArch64/build-vector.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -march aarch64 -mattr neon -o - | FileCheck %s -check-prefix CHECK-LE +; RUN: llc < %s -march aarch64_be -mattr neon -o - | FileCheck %s -check-prefix CHECK-BE + +; CHECK-LE: .word 32 +; CHECK-LE-NEXT: .word 33 +; CHECK-BE: .word 35 +; CHECK-BE-NEXT: .word 34 + +define void @build_const_vector_4i32( <4 x i32>* %storeaddr ) { +; CHECK-LE-LABEL: build_const_vector_4i32: +; CHECK-LE: ldr q +; CHECK-BE-LABEL: build_const_vector_4i32: +; CHECK-BE: ldr q + store <4 x i32> < i32 32, i32 33, i32 34, i32 35 >, <4 x i32>* %storeaddr + ret void +} + +; CHECK-LE: .xword 64 +; CHECK-LE-NEXT: .xword 65 +; CHECK-BE: .xword 65 +; CHECK-BE-NEXT: .xword 64 + +define void @build_const_vector_2i64( <2 x i64>* %storeaddr ) { +; CHECK-LE-LABEL: build_const_vector_2i64: +; CHECK-LE: ldr q +; CHECK-BE-LABEL: build_const_vector_2i64: +; CHECK-BE: ldr q + store <2 x i64> < i64 64, i64 65 >, <2 x i64>* %storeaddr + ret void +} + +; CHECK-LE: .hword 16 +; CHECK-LE-NEXT: .hword 17 +; CHECK-BE: .hword 23 +; CHECK-BE-NEXT: .hword 22 + +define void @build_const_vector_8i16( <8 x i16>* %storeaddr ) { +; CHECK-LE-LABEL: build_const_vector_8i16: +; CHECK-LE: ldr q +; CHECK-BE-LABEL: build_const_vector_8i16: +; CHECK-BE: ldr q + store <8 x i16> < i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23 >, <8 x i16>* %storeaddr + ret void +} + +; CHECK-LE: .byte 8 +; CHECK-LE-NEXT: .byte 9 +; CHECK-BE: .byte 23 +; CHECK-BE-NEXT: .byte 22 + +define void @build_const_vector_16i8( <16 x i8>* %storeaddr ) { +; CHECK-LE-LABEL: build_const_vector_16i8: +; CHECK-LE: ldr q +; CHECK-BE-LABEL: build_const_vector_16i8: +; CHECK-BE: ldr q + store <16 x i8> < i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, + i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23 >, <16 x i8>* %storeaddr + ret void +} +