Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2363,6 +2363,14 @@ moreElementsVectorSrc(MI, MoreTy, 1); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_INSERT: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_SELECT: if (TypeIdx != 0) return UnableToLegalize; Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -521,21 +521,32 @@ return Query.Types[0] != EltTy; }); - // FIXME: Doesn't handle extract of illegal sizes. - getActionDefinitionsBuilder({G_EXTRACT, G_INSERT}) + for (unsigned Op : {G_EXTRACT, G_INSERT}) { + unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0; + unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1; + + // FIXME: Doesn't handle extract of illegal sizes. + getActionDefinitionsBuilder(Op) .legalIf([=](const LegalityQuery &Query) { - const LLT &Ty0 = Query.Types[0]; - const LLT &Ty1 = Query.Types[1]; - return (Ty0.getSizeInBits() % 16 == 0) && - (Ty1.getSizeInBits() % 16 == 0); - }) - .moreElementsIf(isSmallOddVector(1), oneMoreElement(1)) + const LLT BigTy = Query.Types[BigTyIdx]; + const LLT LitTy = Query.Types[LitTyIdx]; + return (BigTy.getSizeInBits() % 32 == 0) && + (LitTy.getSizeInBits() % 16 == 0); + }) .widenScalarIf( - [=](const LegalityQuery &Query) { - const LLT Ty1 = Query.Types[1]; - return (Ty1.getScalarSizeInBits() < 16); - }, - LegalizeMutations::widenScalarOrEltToNextPow2(1, 16)); + [=](const LegalityQuery &Query) { + const LLT BigTy = Query.Types[BigTyIdx]; + return (BigTy.getScalarSizeInBits() < 16); + }, + LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16)) + .widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT LitTy = Query.Types[LitTyIdx]; + return (LitTy.getScalarSizeInBits() < 16); + }, + LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16)) + .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx)); + } // TODO: vectors of pointers getActionDefinitionsBuilder(G_BUILD_VECTOR) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -280,20 +280,34 @@ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[EXTRACT3]], [[EXTRACT4]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[AND]](<2 x s16>), 0 - ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT5]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[AND]](<2 x s16>), 0 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 + ; CHECK: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 + ; CHECK: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT7]](s16) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[INSERT1]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK: [[DEF8:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF8]], [[EXTRACT5]](<3 x s16>), 0 + ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[TRUNC]](s16), 32 + ; CHECK: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; CHECK: [[DEF9:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF9]], [[EXTRACT8]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_AND %0, %1 @@ -351,29 +365,31 @@ ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF3]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF3]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[ANYEXT2]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[ANYEXT4]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[ANYEXT6]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[ANYEXT8]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_AND %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -617,19 +617,33 @@ ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT3]], [[EXTRACT5]](s16) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT6]], [[ASHR]](<2 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[ASHR1]](s16), 32 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s16>), 0 - ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT3]], [[EXTRACT5]](s16) + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF4]](<4 x s16>), 0 + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT6]](<3 x s16>), 0 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[ASHR]](<2 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[ASHR1]](s16), 32 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -467,11 +467,11 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s8>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s8) = G_EXTRACT [[INSERT]](<4 x s8>), 16 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<3 x s16>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<3 x s16>), 32 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT1]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -486,11 +486,11 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF]](<6 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s1>) = G_EXTRACT [[TRUNC]](<6 x s1>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s1>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s1>) = G_INSERT [[DEF1]], [[EXTRACT]](<5 x s1>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s1) = G_EXTRACT [[INSERT]](<6 x s1>), 4 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<5 x s16>) = G_ANYEXT [[EXTRACT]](<5 x s1>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<5 x s16>), 64 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[EXTRACT1]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s1) + ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 %2:_(s32) = G_ANYEXT %1 @@ -532,7 +532,9 @@ ; CHECK-LABEL: name: extract_s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF @@ -549,7 +551,9 @@ ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; CHECK: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 @@ -565,7 +569,10 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 ; CHECK: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -248,13 +248,23 @@ ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT2]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT1]], [[FABS]](<2 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT3]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FABS1]](s16), 32 - ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT4]] + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FABS %0 S_NOP 0, implicit %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -346,15 +346,29 @@ ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[EXTRACT3]], [[EXTRACT4]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[FADD]](<2 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[EXTRACT5]], [[EXTRACT6]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FADD1]](s16), 32 - ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FADD]](<2 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 + ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 + ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 + ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[EXTRACT6]], [[EXTRACT7]] + ; GFX9: [[DEF8:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF8]], [[EXTRACT5]](<3 x s16>), 0 + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FADD1]](s16), 32 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT8]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FADD %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -233,13 +233,23 @@ ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT2]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT1]], [[FCANONICALIZE]](<2 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT3]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FCANONICALIZE1]](s16), 32 - ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FCANONICALIZE]](<2 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT4]] + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FCANONICALIZE1]](s16), 32 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCANONICALIZE %0 S_NOP 0, implicit %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -414,17 +414,35 @@ ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF3]](<4 x s16>), 0 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 + ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[EXTRACT4]], [[EXTRACT5]], [[EXTRACT6]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT3]], [[FMA]](<2 x s16>), 0 - ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[EXTRACT8:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; GFX9: [[EXTRACT9:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT2]](<3 x s16>), 32 - ; GFX9: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[EXTRACT7]], [[EXTRACT8]], [[EXTRACT9]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FMA1]](s16), 32 - ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) + ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT3]](<3 x s16>), 0 + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[FMA]](<2 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT4]](<4 x s16>), 0 + ; GFX9: [[DEF8:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF8]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 + ; GFX9: [[DEF9:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF9]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT9:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT6]](<4 x s16>), 32 + ; GFX9: [[DEF10:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF10]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[EXTRACT10:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT7]](<4 x s16>), 32 + ; GFX9: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[EXTRACT8]], [[EXTRACT9]], [[EXTRACT10]] + ; GFX9: [[DEF11:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF11]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT8]], [[FMA1]](s16), 32 + ; GFX9: [[EXTRACT11:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT9]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT11]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_IMPLICIT_DEF Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -345,15 +345,29 @@ ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[EXTRACT3]], [[EXTRACT4]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[FMUL]](<2 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[EXTRACT5]], [[EXTRACT6]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FMUL1]](s16), 32 - ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMUL]](<2 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 + ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 + ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[EXTRACT6]], [[EXTRACT7]] + ; GFX9: [[DEF8:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF8]], [[EXTRACT5]](<3 x s16>), 0 + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMUL1]](s16), 32 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT8]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FMUL %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -246,13 +246,23 @@ ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT2]] - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT1]], [[FNEG]](<2 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT3]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[FNEG1]](s16), 32 - ; GFX9: S_NOP 0, implicit [[INSERT1]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT4]] + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT3]](<3 x s16>), 0 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT5]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 S_NOP 0, implicit %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -657,3 +657,290 @@ %2:_(<4 x s32>) = G_INSERT %0, %1, 64 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 ... + +--- +name: test_insert_v2s16_s16_offset0 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(<2 x s16>) = G_INSERT %0, %2, 0 + $vgpr0 = COPY %3 +... +--- +name: test_insert_v2s16_s16_offset16 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %1 + %3:_(<2 x s16>) = G_INSERT %0, %2, 16 + $vgpr0 = COPY %3 +... +--- +name: test_insert_v3s16_s16_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %2 + %4:_(<3 x s16>) = G_INSERT %1, %3, 0 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v3s16_s16_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %2 + %4:_(<3 x s16>) = G_INSERT %1, %3, 16 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v3s16_s16_offset32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(s32) = COPY $vgpr2 + %3:_(s16) = G_TRUNC %2 + %4:_(<3 x s16>) = G_INSERT %1, %3, 32 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v3s16_v2s16_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(<2 x s16>) = COPY $vgpr2 + %4:_(<3 x s16>) = G_INSERT %1, %2, 0 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v3s16_v2s16_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(<2 x s16>) = COPY $vgpr2 + %4:_(<3 x s16>) = G_INSERT %1, %2, 16 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v3s16_s32_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(s32) = COPY $vgpr2 + %4:_(<3 x s16>) = G_INSERT %1, %2, 0 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v3s16_s32_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<3 x s16>) = G_EXTRACT %0, 0 + %2:_(s32) = COPY $vgpr2 + %4:_(<3 x s16>) = G_INSERT %1, %2, 16 + %5:_(<4 x s16>) = G_IMPLICIT_DEF + %6:_(<4 x s16>) = G_INSERT %5, %4, 0 + $vgpr0_vgpr1 = COPY %6 +... +--- +name: test_insert_v4s16_s16_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %1 + %3:_(<4 x s16>) = G_INSERT %0, %2, 0 + $vgpr0_vgpr1 = COPY %3 +... +--- +name: test_insert_v4s16_s16_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %1 + %3:_(<4 x s16>) = G_INSERT %0, %2, 16 + $vgpr0_vgpr1 = COPY %3 +... +--- +name: test_insert_v4s16_s16_offset32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %1 + %3:_(<4 x s16>) = G_INSERT %0, %2, 32 + $vgpr0_vgpr1 = COPY %3 +... +--- +name: test_insert_v4s16_s16_offset48 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %1 + %3:_(<4 x s16>) = G_INSERT %0, %2, 48 + $vgpr0_vgpr1 = COPY %3 +... +--- +name: test_insert_v4s16_v2s16_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 0 + $vgpr0_vgpr1 = COPY %2 +... +--- +name: test_insert_v4s16_v2s16_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 16 + $vgpr0_vgpr1 = COPY %2 +... +--- +name: test_insert_v4s16_v2s16_offset32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 32 + $vgpr0_vgpr1 = COPY %2 +... +--- +name: test_insert_v4s16_v2s16_offset48 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 48 + $vgpr0_vgpr1 = COPY %2 +... + +--- +name: test_insert_v4s16_v3s16_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x s16>) = G_EXTRACT %1, 0 + %3:_(<4 x s16>) = G_INSERT %0, %2, 0 + $vgpr0_vgpr1 = COPY %3 +... +--- +name: test_insert_v4s16_v3s16_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<3 x s16>) = G_EXTRACT %1, 0 + %3:_(<4 x s16>) = G_INSERT %0, %2, 16 + $vgpr0_vgpr1 = COPY %3 +... +--- +name: test_insert_v4s16_s32_offset0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 0 + $vgpr0_vgpr1 = COPY %2 +... +--- +name: test_insert_v4s16_s32_offset16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 16 + $vgpr0_vgpr1 = COPY %2 +... +--- +name: test_insert_v4s16_s32_offset32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 32 + $vgpr0_vgpr1 = COPY %2 +... +--- +name: test_insert_v4s16_s32_offset48 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s16>) = G_INSERT %0, %1, 48 + $vgpr0_vgpr1 = COPY %2 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -613,19 +613,33 @@ ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT6]], [[LSHR]](<2 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[LSHR1]](s16), 32 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s16>), 0 - ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF4]](<4 x s16>), 0 + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT6]](<3 x s16>), 0 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[LSHR]](<2 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[LSHR1]](s16), 32 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -280,20 +280,34 @@ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CHECK: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[EXTRACT3]], [[EXTRACT4]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[OR]](<2 x s16>), 0 - ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT5]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR]](<2 x s16>), 0 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 + ; CHECK: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 + ; CHECK: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT7]](s16) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[INSERT1]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK: [[DEF8:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF8]], [[EXTRACT5]](<3 x s16>), 0 + ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[TRUNC]](s16), 32 + ; CHECK: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; CHECK: [[DEF9:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF9]], [[EXTRACT8]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_OR %0, %1 @@ -351,29 +365,31 @@ ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF3]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF3]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT3]], [[ANYEXT4]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT5]], [[ANYEXT6]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT7]], [[ANYEXT8]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_OR %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -251,29 +251,31 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF1]], [[TRUNC1]](<3 x s8>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[TRUNC1]](<3 x s8>), 0 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC2]](<4 x s8>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT1]], [[ANYEXT2]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT3]], [[ANYEXT4]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT5]], [[ANYEXT6]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT7]], [[ANYEXT8]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -598,19 +598,33 @@ ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT3]], [[EXTRACT5]](s16) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT6]], [[SHL]](<2 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[SHL1]](s16), 32 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s16>), 0 - ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 + ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) + ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT3]], [[EXTRACT5]](s16) + ; GFX9: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF4]](<4 x s16>), 0 + ; GFX9: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT6]](<3 x s16>), 0 + ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[SHL]](<2 x s16>), 0 + ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 + ; GFX9: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[SHL1]](s16), 32 + ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; GFX9: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -280,20 +280,34 @@ ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF4]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[EXTRACT3]], [[EXTRACT4]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[XOR]](<2 x s16>), 0 - ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 - ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT5]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[DEF5:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF5]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[XOR]](<2 x s16>), 0 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 + ; CHECK: [[DEF6:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF6]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 + ; CHECK: [[DEF7:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF7]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[EXTRACT7:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT7]](s16) ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[XOR1]](s32) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[INSERT1]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK: [[DEF8:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF8]], [[EXTRACT5]](<3 x s16>), 0 + ; CHECK: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[TRUNC]](s16), 32 + ; CHECK: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 + ; CHECK: [[DEF9:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF9]], [[EXTRACT8]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_XOR %0, %1 @@ -351,29 +365,31 @@ ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF3]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF3]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT1]], [[ANYEXT2]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT3]], [[ANYEXT4]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT5]], [[ANYEXT6]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT7]], [[ANYEXT8]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_XOR %0, %1