Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -124,6 +124,11 @@ /// result elements, and replacing the vreg of the operand in place. void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx); + /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a + /// Use by producing a vector with undefined high elements, extracting the + /// original vector type, and replacing the vreg of the operand in place. + void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx); + LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); LegalizeResult Index: include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -465,6 +465,11 @@ /// \return The newly created instruction. MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op); + /// Build and insert \p Dst = G_BITCAST \p Src + MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src}); + } + /// \return The opcode of the extension the target wants to use for boolean /// values. unsigned getBoolExtOp(bool IsVec, bool IsFP) const; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -764,6 +764,36 @@ MO.setReg(DstExt); } +void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT WideTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + + LLT OldTy = MRI.getType(MO.getReg()); + unsigned OldElts = OldTy.getNumElements(); + unsigned NewElts = WideTy.getNumElements(); + + unsigned NumParts = NewElts / OldElts; + + // Use concat_vectors if the result is a multiple of the number of elements. + if (NumParts * OldElts == NewElts) { + SmallVector Parts; + Parts.push_back(MO.getReg()); + + unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); + for (unsigned I = 1; I != NumParts; ++I) + Parts.push_back(ImpDef); + + auto Concat = MIRBuilder.buildConcatVectors(WideTy, Parts); + MO.setReg(Concat.getReg(0)); + return; + } + + unsigned WideReg = MRI.createGenericVirtualRegister(WideTy); + unsigned ImpDef = MIRBuilder.buildUndef(WideTy)->getOperand(0).getReg(); + MIRBuilder.buildInsert(WideReg, ImpDef, MO.getReg(), 0); + MO.setReg(WideReg); +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -2316,6 +2346,16 @@ Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -55,6 +55,22 @@ }; } +static LegalizeMutation halfSizeVector(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getElementType(); + const unsigned NewNumElts = (Ty.getNumElements() + 1) / 2; + return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy)); + }; +} + +static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) { + return [=](const LegalityQuery &Query) { + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isVector() && QueryTy.getSizeInBits() > Size; + }; +} + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM) { @@ -135,6 +151,8 @@ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) .legalFor({S32, S1, S64, V2S32, V2S16, V4S16}) .clampScalar(0, S32, S64) + .fewerElementsIf(vectorWiderThan(0, 32), halfSizeVector(0)) + .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .scalarize(0); getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO, Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -71,13 +71,18 @@ ; CHECK-LABEL: name: test_zext_trunc_v3s32_to_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[AND]](<2 x s32>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[COPY2]] + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[AND1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s32>) = G_ZEXT %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_and_s32 @@ -165,21 +165,24 @@ ... --- -name: test_and_v3i32 +name: test_and_v3s32 body: | bb.0: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 - ; CHECK-LABEL: name: test_and_v3i32 + ; CHECK-LABEL: name: test_and_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV3]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV4]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[AND]](<2 x s32>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[EXTRACT3]] + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[AND1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_AND %0, %1 @@ -187,22 +190,20 @@ ... --- -name: test_and_v4i32 +name: test_and_v4s32 body: | bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK-LABEL: name: test_and_v4i32 + ; CHECK-LABEL: name: test_and_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = G_AND %0, %1 @@ -210,23 +211,16 @@ ... --- -name: test_and_v5i32 +name: test_and_v5s32 body: | bb.0: - ; CHECK-LABEL: name: test_and_v5i32 + ; CHECK-LABEL: name: test_and_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV5]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV6]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV7]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV8]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(<5 x s32>) = G_AND [[DEF]], [[DEF1]] ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[BUILD_VECTOR]](<5 x s32>), 0 + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[AND]](<5 x s32>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF @@ -284,24 +278,22 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[BUILD_VECTOR]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[EXTRACT3]], [[EXTRACT4]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[AND]](<2 x s16>), 0 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT5]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[INSERT1]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_AND %0, %1 @@ -326,3 +318,108 @@ %2:_(<4 x s16>) = G_AND %0, %1 $vgpr0_vgpr1 = COPY %2 ... + +--- +name: test_and_v5s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_and_v5s16 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[AND:%[0-9]+]]:_(<5 x s16>) = G_AND [[DEF]], [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF2]](<8 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC]], [[AND]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + %0:_(<5 x s16>) = G_IMPLICIT_DEF + %1:_(<5 x s16>) = G_IMPLICIT_DEF + %2:_(<5 x s16>) = G_AND %0, %1 + %3:_(<8 x s16>) = G_IMPLICIT_DEF + %4:_(<8 x s16>) = G_INSERT %3, %2, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 +... + +--- +name: test_and_v3s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_and_v3s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF3]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + %0:_(<3 x s8>) = G_IMPLICIT_DEF + %1:_(<3 x s8>) = G_IMPLICIT_DEF + %2:_(<3 x s8>) = G_AND %0, %1 + %3:_(<3 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_and_v4s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_and_v4s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<4 x s32>) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(<4 x s8>) = G_IMPLICIT_DEF + %2:_(<4 x s8>) = G_AND %0, %1 + %3:_(<4 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_or_s32 @@ -164,6 +164,72 @@ $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_or_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_or_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[OR]](<2 x s32>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT2]], [[EXTRACT3]] + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_or_v4s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_or_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV2]] + ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV1]], [[UV3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_or_v5s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_or_v5s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[OR:%[0-9]+]]:_(<5 x s32>) = G_OR [[DEF]], [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[OR]](<5 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + %0:_(<5 x s32>) = G_IMPLICIT_DEF + %1:_(<5 x s32>) = G_IMPLICIT_DEF + %2:_(<5 x s32>) = G_OR %0, %1 + %3:_(<8 x s32>) = G_IMPLICIT_DEF + %4:_(<8 x s32>) = G_INSERT %3, %2, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 +... + --- name: test_or_v2s64 body: | @@ -212,24 +278,22 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[BUILD_VECTOR]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[EXTRACT3]], [[EXTRACT4]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[OR]](<2 x s16>), 0 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT5]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[INSERT1]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_OR %0, %1 @@ -254,3 +318,108 @@ %2:_(<4 x s16>) = G_OR %0, %1 $vgpr0_vgpr1 = COPY %2 ... + +--- +name: test_or_v5s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_or_v5s16 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[OR:%[0-9]+]]:_(<5 x s16>) = G_OR [[DEF]], [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF2]](<8 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC]], [[OR]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + %0:_(<5 x s16>) = G_IMPLICIT_DEF + %1:_(<5 x s16>) = G_IMPLICIT_DEF + %2:_(<5 x s16>) = G_OR %0, %1 + %3:_(<8 x s16>) = G_IMPLICIT_DEF + %4:_(<8 x s16>) = G_INSERT %3, %2, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 +... + +--- +name: test_or_v3s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_or_v3s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF3]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + %0:_(<3 x s8>) = G_IMPLICIT_DEF + %1:_(<3 x s8>) = G_IMPLICIT_DEF + %2:_(<3 x s8>) = G_OR %0, %1 + %3:_(<3 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_or_v4s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_or_v4s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<4 x s32>) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(<4 x s8>) = G_IMPLICIT_DEF + %2:_(<4 x s8>) = G_OR %0, %1 + %3:_(<4 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_xor_s32 @@ -164,6 +164,72 @@ $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_xor_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_xor_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT]], [[EXTRACT1]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[XOR]](<2 x s32>), 0 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT2]], [[EXTRACT3]] + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[XOR1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(<3 x s32>) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_xor_v4s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_xor_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV]], [[UV2]] + ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[UV1]], [[UV3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + +--- +name: test_xor_v5s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_xor_v5s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[XOR:%[0-9]+]]:_(<5 x s32>) = G_XOR [[DEF]], [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[XOR]](<5 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + %0:_(<5 x s32>) = G_IMPLICIT_DEF + %1:_(<5 x s32>) = G_IMPLICIT_DEF + %2:_(<5 x s32>) = G_XOR %0, %1 + %3:_(<8 x s32>) = G_IMPLICIT_DEF + %4:_(<8 x s32>) = G_INSERT %3, %2, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %4 +... + --- name: test_xor_v2s64 body: | @@ -212,24 +278,22 @@ ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[EXTRACT1]](<3 x s16>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[XOR]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) - ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[XOR1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) - ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[XOR2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[BUILD_VECTOR]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[EXTRACT3]], [[EXTRACT4]] + ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[EXTRACT2]], [[XOR]](<2 x s16>), 0 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT]](<3 x s16>), 32 + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s16) = G_EXTRACT [[EXTRACT1]](<3 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT5]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT6]](s16) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[XOR1]](s32) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[INSERT1]](<3 x s16>), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_XOR %0, %1 @@ -254,3 +318,108 @@ %2:_(<4 x s16>) = G_XOR %0, %1 $vgpr0_vgpr1 = COPY %2 ... + +--- +name: test_xor_v5s16 +body: | + bb.0: + + ; CHECK-LABEL: name: test_xor_v5s16 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[XOR:%[0-9]+]]:_(<5 x s16>) = G_XOR [[DEF]], [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[DEF2]](<8 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[TRUNC]], [[XOR]](<5 x s16>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + %0:_(<5 x s16>) = G_IMPLICIT_DEF + %1:_(<5 x s16>) = G_IMPLICIT_DEF + %2:_(<5 x s16>) = G_XOR %0, %1 + %4:_(<8 x s16>) = G_IMPLICIT_DEF + %5:_(<8 x s16>) = G_INSERT %4, %2, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %5 +... + +--- +name: test_xor_v3s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_xor_v3s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF3]], [[DEF1]](<3 x s8>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + %0:_(<3 x s8>) = G_IMPLICIT_DEF + %1:_(<3 x s8>) = G_IMPLICIT_DEF + %2:_(<3 x s8>) = G_XOR %0, %1 + %3:_(<3 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 +... + +--- +name: test_xor_v4s8 +body: | + bb.0: + + ; CHECK-LABEL: name: test_xor_v4s8 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<4 x s32>) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(<4 x s8>) = G_IMPLICIT_DEF + %2:_(<4 x s8>) = G_XOR %0, %1 + %3:_(<4 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... Index: unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp =================================================================== --- unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -547,4 +547,45 @@ // Check EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } + +TEST_F(GISelMITest, MoreElementsAnd) { + if (!TM) + return; + + LLT s32 = LLT::scalar(32); + LLT v2s32 = LLT::vector(2, 32); + LLT v6s32 = LLT::vector(6, 32); + + LegalizerInfo LI; + LI.getActionDefinitionsBuilder(TargetOpcode::G_AND) + .legalFor({v6s32}) + .clampMinNumElements(0, s32, 6); + LI.computeTables(); + + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, LI, Observer, B); + + B.setInsertPt(*EntryMBB, EntryMBB->end()); + + auto Val0 = B.buildBitcast(v2s32, Copies[0]); + auto Val1 = B.buildBitcast(v2s32, Copies[1]); + + auto And = B.buildAnd(v2s32, Val0, Val1); + + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.moreElementsVector(*And, 0, v6s32)); + + auto CheckStr = R"( + CHECK: [[BITCAST0:%[0-9]+]]:_(<2 x s32>) = G_BITCAST + CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST + CHECK: [[IMP_DEF0:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + CHECK: [[CONCAT0:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + CHECK: [[CONCAT1:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>) + CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[CONCAT0]]:_, [[CONCAT1]]:_ + CHECK: (<2 x s32>) = G_EXTRACT [[AND]]:_(<6 x s32>), 0 + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} } // namespace