diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -307,7 +307,22 @@ // because that's what (MVE) VSTRH.16 followed by VLDRB.8 would do. So the // bitconvert would have to emit a VREV16.8 instruction, whereas the // VECTOR_REG_CAST emits no code at all if the vector is already in a register. -def ARMVectorRegCast : SDNode<"ARMISD::VECTOR_REG_CAST", SDTUnaryOp>; +def ARMVectorRegCastImpl : SDNode<"ARMISD::VECTOR_REG_CAST", SDTUnaryOp>; + +// In little-endian, VECTOR_REG_CAST is often turned into bitconvert during +// lowering (because in that situation they're identical). So an isel pattern +// that needs to match something that's _logically_ a VECTOR_REG_CAST must +// _physically_ match a different node type depending on endianness. +// +// This 'PatFrags' instance is a centralized facility to make that easy. It +// matches VECTOR_REG_CAST in either endianness, and also bitconvert in the +// endianness where it's equivalent. +def ARMVectorRegCast: PatFrags< + (ops node:$x), [(ARMVectorRegCastImpl node:$x), (bitconvert node:$x)], [{ + // Reject a match against bitconvert (aka ISD::BITCAST) if big-endian + return !(CurDAG->getDataLayout().isBigEndian() && + N->getOpcode() == ISD::BITCAST); + }]>; //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4013,9 +4013,15 @@ (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>; } + // Here we match the specific SDNode type 'ARMVectorRegCastImpl' + // rather than the more general 'ARMVectorRegCast' which would also + // match some bitconverts. If we use the latter in cases where the + // input and output types are the same, the bitconvert gets elided + // and we end up generating a nonsense match of nothing. + foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in - def : Pat<(VT (ARMVectorRegCast (VT2 MQPR:$src))), (VT MQPR:$src)>; + def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))), (VT MQPR:$src)>; } // end of MVE compares diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -7532,13 +7532,19 @@ } let Predicates = [HasNEON] in { + // Here we match the specific SDNode type 'ARMVectorRegCastImpl' + // rather than the more general 'ARMVectorRegCast' which would also + // match some bitconverts. If we use the latter in cases where the + // input and output types are the same, the bitconvert gets elided + // and we end up generating a nonsense match of nothing. + foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in - def : Pat<(VT (ARMVectorRegCast (VT2 QPR:$src))), (VT QPR:$src)>; + def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>; foreach VT = [ v8i8, v4i16, v4f16, v2i32, v2f32, v1i64, f64 ] in foreach VT2 = [ v8i8, v4i16, v4f16, v2i32, v2f32, v1i64, f64 ] in - def : Pat<(VT (ARMVectorRegCast (VT2 DPR:$src))), (VT DPR:$src)>; + def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>; } // Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian