Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2603,90 +2603,62 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { + assert(TypeIdx == 0 && "only one type index expected"); + const unsigned Opc = MI.getOpcode(); - const unsigned NumOps = MI.getNumOperands() - 1; - const unsigned NarrowSize = NarrowTy.getSizeInBits(); + const int NumOps = MI.getNumOperands() - 1; const Register DstReg = MI.getOperand(0).getReg(); const unsigned Flags = MI.getFlags(); - const LLT DstTy = MRI.getType(DstReg); - const unsigned Size = DstTy.getSizeInBits(); - const int NumParts = Size / NarrowSize; - const LLT EltTy = DstTy.getElementType(); - const unsigned EltSize = EltTy.getSizeInBits(); - const unsigned BitsForNumParts = NarrowSize * NumParts; - - // Check if we have any leftovers. If we do, then only handle the case where - // the leftover is one element. - if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) - return UnableToLegalize; - - if (BitsForNumParts != Size) { - Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy); - MIRBuilder.buildUndef(AccumDstReg); - - // Handle the pieces which evenly divide into the requested type with - // extract/op/insert sequence. - for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { - SmallVector SrcOps; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), Offset); - SrcOps.push_back(PartOpReg); - } - - Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); - Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy); - MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); - AccumDstReg = PartInsertReg; - } + assert(NumOps <= 3 && "expected instrution with 1 result and 1-3 sources"); - // Handle the remaining element sized leftover piece. - SmallVector SrcOps; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register PartOpReg = MRI.createGenericVirtualRegister(EltTy); - MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I), BitsForNumParts); - SrcOps.push_back(PartOpReg); - } + LLT GCDTys[3]; + LLT LCMTys[3]; + SmallVector ExtractedRegs[3]; + SmallVector Parts; - Register PartDstReg = MRI.createGenericVirtualRegister(EltTy); - MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); - MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); - MI.eraseFromParent(); + // Break down all the sources into NarrowTy pieces we can operate on. This may + // involve creating merges to a wider type, paddedd with undef. + for (int I = 0; I != NumOps; ++I) { + Register SrcReg = MI.getOperand(I + 1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + GCDTys[I] = extractGCDType(ExtractedRegs[I], SrcTy, NarrowTy, SrcReg); - return Legalized; + // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. + LCMTys[I] = buildLCMMergePieces(SrcTy, NarrowTy, GCDTys[I], + ExtractedRegs[I], TargetOpcode::G_ANYEXT); } - SmallVector DstRegs, Src0Regs, Src1Regs, Src2Regs; - - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); + SmallVector ResultRegs; - if (NumOps >= 2) - extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); + // Input operands for each sub-instruction. + SmallVector InputRegs(NumOps, Register()); - if (NumOps >= 3) - extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); + int NumParts = ExtractedRegs[0].size(); + const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + const unsigned NarrowSize = NarrowTy.getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + // We widened the source registers to satisfy merge/unmerge size + // constraints. We'll have some extra fully undef parts. + const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; - if (NumOps == 1) - MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); - else if (NumOps == 2) { - MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); - } else if (NumOps == 3) { - MIRBuilder.buildInstr(Opc, {DstReg}, - {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); - } + for (int I = 0; I != NumRealParts; ++I) { + // Emit this instruction on each of the split pieces. + for (int J = 0; J != NumOps; ++J) + InputRegs[J] = ExtractedRegs[J][I]; - DstRegs.push_back(DstReg); + auto Inst = MIRBuilder.buildInstr(Opc, {NarrowTy}, InputRegs, Flags); + ResultRegs.push_back(Inst.getReg(0)); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); + // Fill out the widened result with undef instead of creating instructions + // with undef inputs. + int NumUndefParts = NumParts - NumRealParts; + if (NumUndefParts != 0) + ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0)); + + // Extract the possibly padded result to the original result register. + buildWidenedRemergeToDst(DstReg, LCMTys[0], ResultRegs); MI.eraseFromParent(); return Legalized; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -75,18 +75,19 @@ ; CHECK-LABEL: name: test_zext_trunc_v3s32_to_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[AND]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[COPY2]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[AND1]](s32), 64 - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s32>) = G_ZEXT %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -242,16 +242,19 @@ ; CHECK-LABEL: name: test_and_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[AND]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[EXTRACT3]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[AND1]](s32), 64 - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_AND %0, %1 @@ -287,22 +290,24 @@ ; CHECK-LABEL: name: test_and_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY [[DEF]](<5 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[COPY]], [[AND]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 64 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 64 - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[EXTRACT2]], [[EXTRACT3]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[AND1]](<2 x s32>), 64 - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<5 x s32>), 128 - ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF1]](<5 x s32>), 128 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[EXTRACT5]] - ; CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]], [[AND2]](s32), 128 - ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT3:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[INSERT2]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT3]](<8 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) + ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] + ; CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s32>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[EXTRACT]](<5 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_AND %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -212,64 +212,89 @@ ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; SI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT]] - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0 - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; SI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT1]] - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32 - ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; SI: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]] + ; SI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]] + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; VI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT]] - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0 - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; VI: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT1]] - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32 - ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; VI: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]] + ; VI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]] + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[EXTRACT]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FABS]](<2 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[EXTRACT1]] - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FABS1]](s16), 32 - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC]] + ; GFX9: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FABS %0 S_NOP 0, implicit %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -431,26 +431,36 @@ ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF3]](s32) + ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[EXTRACT]], [[EXTRACT1]] - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FADD]](<2 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[EXTRACT2]], [[EXTRACT3]] - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FADD1]](s16), 32 - ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) - ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>) + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF3]](s32) + ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<2 x s16>), [[FADD1]](<2 x s16>), [[DEF4]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FADD %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -293,22 +293,25 @@ ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[EXTRACT]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FCANONICALIZE]](<2 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[EXTRACT1]] - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FCANONICALIZE1]](s16), 32 - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x s16>), [[FCANONICALIZE1]](<2 x s16>), [[DEF3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCANONICALIZE %0 S_NOP 0, implicit %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -524,30 +524,47 @@ ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF4]](s32) + ; GFX9: [[DEF5:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 + ; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF4]](s32) ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[EXTRACT]], [[EXTRACT1]], [[EXTRACT2]] - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT3]], [[FMA]](<2 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT4]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[EXTRACT4:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT6]](<4 x s16>), 32 - ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT7]](<4 x s16>), 32 - ; GFX9: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[EXTRACT3]], [[EXTRACT4]], [[EXTRACT5]] - ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV12]](<3 x s16>), 0 - ; GFX9: [[INSERT9:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT8]], [[FMA1]](s16), 32 - ; GFX9: [[CONCAT_VECTORS4:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT9]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV16:%[0-9]+]]:_(<3 x s16>), [[UV17:%[0-9]+]]:_(<3 x s16>), [[UV18:%[0-9]+]]:_(<3 x s16>), [[UV19:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS4]](<12 x s16>) - ; GFX9: S_NOP 0, implicit [[UV16]](<3 x s16>) + ; GFX9: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) + ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF4]](s32) + ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>), [[DEF5]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_IMPLICIT_DEF Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -430,26 +430,36 @@ ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF3]](s32) + ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[EXTRACT]], [[EXTRACT1]] - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[FMUL]](<2 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT4]](<4 x s16>), 32 - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT5]](<4 x s16>), 32 - ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[EXTRACT2]], [[EXTRACT3]] - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 - ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[FMUL1]](s16), 32 - ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) - ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>) + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF3]](s32) + ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMUL]](<2 x s16>), [[FMUL1]](<2 x s16>), [[DEF4]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FMUL %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -210,64 +210,89 @@ ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; SI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT]] - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0 - ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; SI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT1]] - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; SI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32 - ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; SI: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]] + ; SI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]] + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fneg_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; VI: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT]] - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0 - ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; VI: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT1]] - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; VI: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32 - ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; VI: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]] + ; VI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]] + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fneg_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s16>) = COPY [[UV]](<3 x s16>) ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[EXTRACT]] - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[COPY]](<3 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT1]], [[FNEG]](<2 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[EXTRACT1]] - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[FNEG1]](s16), 32 - ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) - ; GFX9: S_NOP 0, implicit [[UV8]](<3 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC]] + ; GFX9: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC1]] + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 S_NOP 0, implicit %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -242,16 +242,19 @@ ; CHECK-LABEL: name: test_or_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[OR]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT2]], [[EXTRACT3]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[OR1]](s32), 64 - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_OR %0, %1 @@ -287,22 +290,24 @@ ; CHECK-LABEL: name: test_or_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY [[DEF]](<5 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 0 - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[COPY]], [[OR]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 64 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 64 - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[EXTRACT2]], [[EXTRACT3]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[OR1]](<2 x s32>), 64 - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<5 x s32>), 128 - ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF1]](<5 x s32>), 128 - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[EXTRACT4]], [[EXTRACT5]] - ; CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]], [[OR2]](s32), 128 - ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT3:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[INSERT2]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT3]](<8 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) + ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] + ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] + ; CHECK: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s32>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[EXTRACT]](<5 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_OR %0, %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -242,16 +242,19 @@ ; CHECK-LABEL: name: test_xor_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[XOR]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT2]], [[EXTRACT3]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[XOR1]](s32), 64 - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_XOR %0, %1 @@ -287,22 +290,24 @@ ; CHECK-LABEL: name: test_xor_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY [[DEF]](<5 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 0 - ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[INSERT:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[COPY]], [[XOR]](<2 x s32>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF]](<5 x s32>), 64 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[DEF1]](<5 x s32>), 64 - ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[EXTRACT2]], [[EXTRACT3]] - ; CHECK: [[INSERT1:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT]], [[XOR1]](<2 x s32>), 64 - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<5 x s32>), 128 - ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF1]](<5 x s32>), 128 - ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT4]], [[EXTRACT5]] - ; CHECK: [[INSERT2:%[0-9]+]]:_(<5 x s32>) = G_INSERT [[INSERT1]], [[XOR2]](s32), 128 - ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT3:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF2]], [[INSERT2]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT3]](<8 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) + ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] + ; CHECK: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] + ; CHECK: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s32>), 0 + ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[EXTRACT]](<5 x s32>), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_XOR %0, %1