Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -839,6 +839,8 @@ bool fallBackToDAGISel(const Instruction &Inst) const override; + SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const; + /// SVE code generation for fixed length vectors does not custom lower /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to /// merge. However, merging them creates a BUILD_VECTOR that is just as Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -284,6 +284,8 @@ addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); + setOperationAction(ISD::CopyToReg, MVT::Other, Custom); + if (Subtarget->hasLS64()) { addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass); setOperationAction(ISD::LOAD, MVT::i64x8, Custom); @@ -5028,6 +5030,8 @@ default: llvm_unreachable("unimplemented operand"); return SDValue(); + case ISD::CopyToReg: + return LowerCopyToReg(Op, DAG); case ISD::BITCAST: return LowerBITCAST(Op, DAG); case ISD::GlobalAddress: @@ -5281,6 +5285,53 @@ } } +static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, + const APInt &Bits, + const SDValue *LHS = nullptr); +static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, + APInt &UndefBits); + +SDValue AArch64TargetLowering::LowerCopyToReg(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op->getOperand(0); + SDValue LHS = Op->getOperand(1); + SDValue RHS = Op->getOperand(2); + ConstantFPSDNode *CFP = dyn_cast(RHS); + EVT VT = LHS->getValueType(0); + if (!CFP || !LHS->hasNUsesOfValue(2, 0) || + VT.getSimpleVT().SimpleTy != MVT::f32) + return SDValue(); + + const APFloat &FPVal = CFP->getValueAPF(); + const APInt ImmInt = FPVal.bitcastToAPInt(); + uint64_t Imm = ImmInt.getZExtValue(); + // Skip getFP32Imm as related value already deal with fmov. + if (AArch64_AM::getFP32Imm(ImmInt) != -1 || FPVal.isPosZero() || + !AArch64_AM::isAdvSIMDModImmType4(Imm << 32 | Imm)) + return SDValue(); + + SDLoc dl(Op); + MVT VecTy = MVT::v2f32; + APInt DefBits(VecTy.getSizeInBits(), 0); + APInt UndefBits(VecTy.getSizeInBits(), 0); + SDValue Parts[2]; + for (int Elt = 0; Elt < 2; Elt++) + Parts[Elt] = RHS; + SDValue DupVal = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f32, Parts); + + BuildVectorSDNode *BVN = cast(DupVal.getNode()); + if (!resolveBuildVector(BVN, DefBits, UndefBits)) + return SDValue(); + + SDValue NewOp = + tryAdvSIMDModImm32(AArch64ISD::MOVIshift, DupVal, DAG, DefBits); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, NewOp, + DAG.getConstant(0, dl, MVT::i32)); + + DAG.ReplaceAllUsesWith(RHS, Elt); + return Op; +} + bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const { return !Subtarget->useSVEForFixedLengthVectors(); } @@ -10288,8 +10339,7 @@ // Try 32-bit splatted SIMD immediate. static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, - const APInt &Bits, - const SDValue *LHS = nullptr) { + const APInt &Bits, const SDValue *LHS) { if (Bits.getHiBits(64) == Bits.getLoBits(64)) { uint64_t Value = Bits.zextOrTrunc(64).getZExtValue(); EVT VT = Op.getValueType(); Index: llvm/test/Transforms/LoopVectorize/AArch64/remat-const-float-simd.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/remat-const-float-simd.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s + +; Check that float(0x7fffffff) can be rematerialized with simd instruction +target triple = "aarch64-unknown-linux-gnu" + +; float foo(void) { return float(0x7fffffff); } +define float @foo() { +; CHECK: movi v0.2s, #79, lsl #24 +entry: + ret float 0x41E0000000000000 +} + +