Index: llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -271,11 +271,11 @@ return true; } -static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, - ShuffleVectorPseudo &MatchInfo) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); - auto Lane = getSplatIndex(MI); - if (!Lane || *Lane != 0) +/// Helper function for matchDup. +static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, + MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + if (Lane != 0) return false; // Try to match a vector splat operation into a dup instruction. @@ -286,7 +286,8 @@ // %cst0:gpr(s32) = G_CONSTANT i32 0 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) - // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) + // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, + // %zerovec(<2 x s32>) // // ...into: // %splat = G_DUP %scalar @@ -296,10 +297,9 @@ MI.getOperand(1).getReg(), MRI); if (!InsMI) return false; - // Match the undef vector operand. - if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, - InsMI->getOperand(1).getReg(), MRI)) + if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), + MRI)) return false; // Match the index constant 0. @@ -307,12 +307,47 @@ if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index) return false; - Register Dst = MI.getOperand(0).getReg(); + MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), + {InsMI->getOperand(2).getReg()}); + return true; +} + +/// Helper function for matchDup. +static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, + MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(Lane >= 0 && "Expected positive lane?"); + // Test if the LHS is a BUILD_VECTOR and the lane being referenced is a + // non-constant. If so, we can just reference the lane's definition directly. + auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, + MI.getOperand(1).getReg(), MRI); + if (!BuildVecMI) + return false; + Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); + if (getConstantVRegValWithLookThrough(Reg, MRI)) + return false; MatchInfo = - ShuffleVectorPseudo(AArch64::G_DUP, Dst, {InsMI->getOperand(2).getReg()}); + ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); return true; } +static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + auto MaybeLane = getSplatIndex(MI); + if (!MaybeLane) + return false; + int Lane = *MaybeLane; + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane < 0) + Lane = 0; + if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) + return true; + if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) + return true; + return false; +} + static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, ShuffleVectorPseudo &MatchInfo) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); Index: llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir @@ -328,3 +328,120 @@ %splat:_(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) $d0 = COPY %splat(<8 x s8>) RET_ReallyLR implicit $d0 + +... +--- +name: build_vector +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $w2, $w3 + ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR, and the 0th input + ; operand is not a constant. We should get a G_DUP. + ; + ; CHECK-LABEL: name: build_vector + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK: %lane:_(s32) = COPY $w0 + ; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane(s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %lane:_(s32) = COPY $w0 + %b:_(s32) = COPY $w1 + %c:_(s32) = COPY $w2 + %d:_(s32) = COPY $w3 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane, %b, %c, %d + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: no_build_vector +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $w2, $w3 + ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR, but the 0th input + ; operand is a constant. We should not get a G_DUP. + ; + ; CHECK-LABEL: name: no_build_vector + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK: %lane:_(s32) = G_CONSTANT i32 1 + ; CHECK: %b:_(s32) = COPY $w1 + ; CHECK: %c:_(s32) = COPY $w2 + ; CHECK: %d:_(s32) = COPY $w3 + ; CHECK: %undef:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane(s32), %b(s32), %c(s32), %d(s32) + ; CHECK: %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %lane:_(s32) = G_CONSTANT i32 1 + %b:_(s32) = COPY $w1 + %c:_(s32) = COPY $w2 + %d:_(s32) = COPY $w3 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane, %b, %c, %d + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: build_vector_cst_in_other_lane +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $w2, $w3 + ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR. The 0-th input is not + ; a constant, so we should get a G_DUP. + ; + ; CHECK-LABEL: name: build_vector_cst_in_other_lane + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK: %lane:_(s32) = COPY $w0 + ; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane(s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %lane:_(s32) = COPY $w0 + %b:_(s32) = G_CONSTANT i32 1 + %c:_(s32) = COPY $w2 + %d:_(s32) = COPY $w3 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane, %b, %c, %d + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: build_vector_undef_lane +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $w2, $w3 + ; When we have a negative lane, we should default to lane 0. So, we should + ; get a G_DUP here. + ; + ; CHECK-LABEL: name: build_vector_undef_lane + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK: %lane:_(s32) = COPY $w0 + ; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane(s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %lane:_(s32) = COPY $w0 + %b:_(s32) = G_CONSTANT i32 1 + %c:_(s32) = G_CONSTANT i32 2 + %d:_(s32) = G_CONSTANT i32 3 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %buildvec:_(<4 x s32>) = G_BUILD_VECTOR %lane, %b, %c, %d + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(-1, -1, -1, -1) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0