diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -282,8 +282,70 @@ unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg()); MachineInstr *MergeI = MRI.getVRegDef(Src); - if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode())) + if (!MergeI) return false; + if (!isMergeLikeOpcode(MergeI->getOpcode())) { + // Try to combine away an extract of an insert of the same type. + if (MergeI->getOpcode() != TargetOpcode::G_INSERT) + return false; + + MachineInstr *InsertI = MergeI; + MachineInstr *SecondInsert = nullptr; + + unsigned InsertSrcReg = InsertI->getOperand(2).getReg(); + unsigned ExtractOffset = MI.getOperand(2).getImm(); + unsigned InsertOffset = InsertI->getOperand(3).getImm(); + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (MRI.getType(InsertSrcReg) != DstTy) { + // This insert doesn't match the extract, *but* this might be a + // pattern where the insert's "subreg def" has no use, because on an + // earlier combine we might have killed the user. E.g. + // %1(s24) = undef + // %2(s24) = insert %1, %val(s16), 0 + // %3(s24) = insert %2, %deadval(s8), 16 + // %4(s16) = extract %3, 0 + // + // In the above case, the insert at %3 isn't doing anything useful, + // probably because we've already combined away it's paired extract. + // Regardless, the insert doesn't cover the range of bits that we're + // interested in, so look again at the register being inserted into to + // see if it's a matching insert. + // TODO: This algorithm could be generalized to handle chains of more + // than 2 inserts + + // Check if the inserted range overlaps with our extracted range. + InsertOffset = InsertI->getOperand(3).getImm(); + LLT InsertSrcTy = MRI.getType(InsertSrcReg); + unsigned InsertLowBit = InsertOffset; + unsigned InsertHighBit = InsertLowBit + InsertSrcTy.getSizeInBits() - 1; + unsigned ExtractLowBit = ExtractOffset; + unsigned ExtractHighBit = ExtractLowBit + DstTy.getSizeInBits() - 1; + + if (InsertLowBit <= ExtractHighBit && InsertHighBit >= ExtractLowBit) + return false; // Overlaps, bail out. + + SecondInsert = MRI.getVRegDef(InsertI->getOperand(1).getReg()); + if (!SecondInsert || + SecondInsert->getOpcode() != TargetOpcode::G_INSERT) + return false; + InsertSrcReg = SecondInsert->getOperand(2).getReg(); + InsertOffset = SecondInsert->getOperand(3).getImm(); + if (MRI.getType(InsertSrcReg) != DstTy) + return false; + } + + if (InsertOffset != ExtractOffset) + return false; + Builder.setInstr(MI); + MRI.replaceRegWith(DstReg, InsertSrcReg); + markInstAndDefDead(MI, *InsertI, DeadInsts); + + if (SecondInsert && MRI.hasOneUse(SecondInsert->getOperand(0).getReg())) + DeadInsts.push_back(SecondInsert); + return true; + } LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(Src); @@ -324,6 +386,13 @@ // etc, process the dead instructions now if any. if (!DeadInsts.empty()) deleteMarkedDeadInsts(DeadInsts, WrapperObserver); + + auto combineUses = [&](unsigned Reg) { + bool Changed = false; + for (auto &Use : MRI.use_instructions(Reg)) + Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver); + return Changed; + }; switch (MI.getOpcode()) { default: return false; @@ -335,14 +404,12 @@ return tryCombineSExt(MI, DeadInsts); case TargetOpcode::G_UNMERGE_VALUES: return tryCombineMerges(MI, DeadInsts); + case TargetOpcode::G_MERGE_VALUES: + return combineUses(MI.getOperand(0).getReg()); case TargetOpcode::G_EXTRACT: return tryCombineExtract(MI, DeadInsts); - case TargetOpcode::G_TRUNC: { - bool Changed = false; - for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg())) - Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver); - return Changed; - } + case TargetOpcode::G_TRUNC: + return combineUses(MI.getOperand(0).getReg()); } } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -170,6 +170,12 @@ ArrayRef Src1Regs, ArrayRef Src2Regs, LLT NarrowTy); + /// Helper function to create two new MMOs by splitting a larger MMO into a + /// two contiguous smaller MMOs. + std::pair + splitMMO(MachineMemOperand *MMO, unsigned Size1, unsigned Size2, + MachineFunction &MF); + LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -625,6 +625,10 @@ return actionIf(LegalizeAction::Unsupported, LegalityPredicates::memSizeInBytesNotPow2(0)); } + LegalizeRuleSet &lowerIfMemSizeNotPow2() { + return actionIf(LegalizeAction::Lower, + LegalityPredicates::memSizeInBytesNotPow2(0)); + } LegalizeRuleSet &customIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a custom action with a diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -76,6 +76,7 @@ case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_INSERT: case TargetOpcode::G_EXTRACT: return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1373,6 +1373,24 @@ } } +std::pair +LegalizerHelper::splitMMO(MachineMemOperand *MMO, unsigned Size1, + unsigned Size2, MachineFunction &MF) { + MachineMemOperand *NewLargeMMO = MF.getMachineMemOperand( + MMO->getPointerInfo(), MMO->getFlags(), Size1 / 8, MMO->getAlignment(), + MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(), + MMO->getOrdering(), MMO->getFailureOrdering()); + + MachinePointerInfo SmallPtrInfo = MMO->getPointerInfo(); + SmallPtrInfo.Offset = Size1 / 8; + MachineMemOperand *NewSmallMMO = MIRBuilder.getMF().getMachineMemOperand( + SmallPtrInfo, MMO->getFlags(), Size2 / 8, MMO->getAlignment(), + MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(), + MMO->getOrdering(), MMO->getFailureOrdering()); + + return std::make_pair(NewLargeMMO, NewSmallMMO); +} + LegalizerHelper::LegalizeResult LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { using namespace TargetOpcode; @@ -1511,10 +1529,41 @@ auto &MMO = **MI.memoperands_begin(); if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { - // In the case of G_LOAD, this was a non-extending load already and we're - // about to lower to the same instruction. - if (MI.getOpcode() == TargetOpcode::G_LOAD) + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + auto SplitMMOs = + splitMMO(&MMO, LargeSplitSize, SmallSplitSize, MIRBuilder.getMF()); + assert(SplitMMOs.first && SplitMMOs.second && "Expected split MMOs"); + + LLT LargeTy = LLT::scalar(LargeSplitSize); + LLT SmallTy = LLT::scalar(SmallSplitSize); + + unsigned LargeLdReg = MRI.createGenericVirtualRegister(LargeTy); + unsigned SmallLdReg = MRI.createGenericVirtualRegister(SmallTy); + auto LargeLoad = + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *SplitMMOs.first); + auto SmallLoad = + MIRBuilder.buildLoad(SmallLdReg, PtrReg, *SplitMMOs.second); + + insertParts(DstReg, DstTy, LargeTy, {LargeLoad.getReg(0)}, + SmallTy, {SmallLoad.getReg(0)}); + /* auto Undef = MIRBuilder.buildUndef(DstReg); + unsigned InsertLReg = MRI.createGenericVirtualRegister(DstTy); + auto InsertL = MIRBuilder.buildInsert(InsertLReg, Undef.getReg(0), + LargeLoad.getReg(0), 0); + unsigned SmallLReg = MRI.createGenericVirtualRegister(DstTy); + auto InsertS = MIRBuilder.buildInsert(Insert*/ + MI.eraseFromParent(); + return Legalized; + } MIRBuilder.buildLoad(DstReg, PtrReg, MMO); MI.eraseFromParent(); return Legalized; @@ -1543,6 +1592,37 @@ return UnableToLegalize; } + case TargetOpcode::G_STORE: { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store + unsigned SrcReg = MI.getOperand(0).getReg(); + unsigned PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSize() /* in bytes */ * 8) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + + auto SplitMMOs = + splitMMO(&MMO, LargeSplitSize, SmallSplitSize, MIRBuilder.getMF()); + assert(SplitMMOs.first && SplitMMOs.second && "Expected split MMOs"); + // Split the value. + auto ExtractLarge = + MIRBuilder.buildExtract(LLT::scalar(LargeSplitSize), SrcReg, 0); + auto ExtractSmall = MIRBuilder.buildExtract(LLT::scalar(SmallSplitSize), + SrcReg, LargeSplitSize); + + MIRBuilder.buildStore(ExtractLarge.getReg(0), PtrReg, *SplitMMOs.first); + MIRBuilder.buildStore(ExtractSmall.getReg(0), PtrReg, *SplitMMOs.second); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -217,14 +217,12 @@ .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) .clampScalar(0, s8, s64) - .widenScalarToNextPow2(0) - // TODO: We could support sum-of-pow2's but the lowering code doesn't know - // how to do that yet. - .unsupportedIfMemSizeNotPow2() + .lowerIfMemSizeNotPow2() // Lower any any-extending loads left into G_ANYEXT and G_LOAD .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; }) + .widenScalarToNextPow2(0) .clampMaxNumElements(0, s32, 2) .clampMaxNumElements(0, s64, 1); @@ -236,10 +234,7 @@ {p0, p0, 64, 8}, {v2s32, p0, 64, 8}}) .clampScalar(0, s8, s64) - .widenScalarToNextPow2(0) - // TODO: We could support sum-of-pow2's but the lowering code doesn't know - // how to do that yet. - .unsupportedIfMemSizeNotPow2() + .lowerIfMemSizeNotPow2() .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -64,16 +64,6 @@ ret i32 %cst } - ; General legalizer inability to handle types whose size wasn't a power of 2. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(s42), %0:_(p0) :: (store 6 into %ir.addr, align 8) (in function: odd_type) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type -; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type: -define void @odd_type(i42* %addr) { - %val42 = load i42, i42* %addr - store i42 %val42, i42* %addr - ret void -} - ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-non-pow2-load-store.mir @@ -0,0 +1,42 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -run-pass=legalizer %s -o - -verify-machineinstrs | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + define i32 @load_store_test(i24* %ptr, i24* %ptr2) { + %val = load i24, i24* %ptr + store i24 %val, i24* %ptr2 + ret i32 0 + } + +... +--- +name: load_store_test +alignment: 2 +tracksRegLiveness: true +body: | + bb.1 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: load_store_test + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load 2 from %ir.ptr, align 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.ptr + 2, align 4) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: G_STORE [[LOAD]](s16), [[COPY1]](p0) :: (store 2 into %ir.ptr2, align 4) + ; CHECK: G_STORE [[LOAD1]](s8), [[COPY1]](p0) :: (store 1 into %ir.ptr2 + 2, align 4) + ; CHECK: $w0 = COPY [[C]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %3:_(s32) = G_CONSTANT i32 0 + %2:_(s24) = G_LOAD %0(p0) :: (load 3 from %ir.ptr, align 4) + G_STORE %2(s24), %1(p0) :: (store 3 into %ir.ptr2, align 4) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -399,36 +399,48 @@ bb.0: ; CHECK-LABEL: name: test_and_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF3]](<4 x s8>) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[ANYEXT2]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[ANYEXT4]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[ANYEXT6]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[ANYEXT8]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC1]](<4 x s8>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s8>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[EXTRACT]](<3 x s8>), 0 + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF3]](<4 x s32>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT1]], [[EXTRACT1]](<3 x s8>), 0 + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC3]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC5]](<4 x s8>) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT2]](<3 x s8>) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -399,36 +399,48 @@ bb.0: ; CHECK-LABEL: name: test_or_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF3]](<4 x s8>) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT3]], [[ANYEXT4]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT5]], [[ANYEXT6]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT7]], [[ANYEXT8]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC1]](<4 x s8>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s8>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[EXTRACT]](<3 x s8>), 0 + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF3]](<4 x s32>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT1]], [[EXTRACT1]](<3 x s8>), 0 + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC3]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC5]](<4 x s8>) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT2]](<3 x s8>) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -248,34 +248,34 @@ ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) ; CHECK: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>) ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[TRUNC1]](<3 x s8>), 0 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC2]], [[TRUNC]](<3 x s8>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[TRUNC3]], [[TRUNC1]](<3 x s8>), 0 ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC2]](<4 x s8>) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT1]], [[ANYEXT2]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT3]], [[ANYEXT4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT5]], [[ANYEXT6]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT7]], [[ANYEXT8]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -251,34 +251,31 @@ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]](s128) ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]] ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C2]](s64), [[C3]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C2]](s64), [[C3]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C4]] ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[TRUNC]] ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C4]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C5]] - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) - ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB1]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[TRUNC]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[TRUNC]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[SUB1]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB]](s32) + ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[SUB]](s32) ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C6]] ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV3]], [[SELECT1]] - ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV4]], [[SELECT]] - ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[UV5]], [[SELECT2]] + ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND1]], [[SELECT1]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND]], [[SELECT]] + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[SELECT2]] ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 30 ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C7]](s64), [[C8]](s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV2]](s128) + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C7]](s64), [[C8]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC1]], [[C9]] ; CHECK: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C9]], [[TRUNC1]] @@ -298,8 +295,8 @@ ; CHECK: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR2]], [[SELECT5]] ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 45 ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C12]](s64), [[C13]](s64) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s128) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C12]](s64), [[C13]](s64) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[MV2]](s128) ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC2]], [[C14]] ; CHECK: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[TRUNC2]] @@ -319,8 +316,8 @@ ; CHECK: [[OR8:%[0-9]+]]:_(s64) = G_OR [[OR5]], [[SELECT8]] ; CHECK: [[C17:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 ; CHECK: [[C18:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C17]](s64), [[C18]](s64) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[MV4]](s128) + ; CHECK: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C17]](s64), [[C18]](s64) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s128) ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC3]], [[C19]] ; CHECK: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C19]], [[TRUNC3]] @@ -340,8 +337,8 @@ ; CHECK: [[OR11:%[0-9]+]]:_(s64) = G_OR [[OR8]], [[SELECT11]] ; CHECK: [[C22:%[0-9]+]]:_(s64) = G_CONSTANT i64 75 ; CHECK: [[C23:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C22]](s64), [[C23]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[MV5]](s128) + ; CHECK: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C22]](s64), [[C23]](s64) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[MV4]](s128) ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[TRUNC4]], [[C24]] ; CHECK: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C24]], [[TRUNC4]] @@ -361,8 +358,8 @@ ; CHECK: [[OR14:%[0-9]+]]:_(s64) = G_OR [[OR11]], [[SELECT14]] ; CHECK: [[C27:%[0-9]+]]:_(s64) = G_CONSTANT i64 90 ; CHECK: [[C28:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV6:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C27]](s64), [[C28]](s64) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[MV6]](s128) + ; CHECK: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C27]](s64), [[C28]](s64) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[MV5]](s128) ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[TRUNC5]], [[C29]] ; CHECK: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[TRUNC5]] @@ -382,8 +379,8 @@ ; CHECK: [[OR17:%[0-9]+]]:_(s64) = G_OR [[OR14]], [[SELECT17]] ; CHECK: [[C32:%[0-9]+]]:_(s64) = G_CONSTANT i64 105 ; CHECK: [[C33:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV7:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C32]](s64), [[C33]](s64) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[MV7]](s128) + ; CHECK: [[MV6:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C32]](s64), [[C33]](s64) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[MV6]](s128) ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[SUB12:%[0-9]+]]:_(s32) = G_SUB [[TRUNC6]], [[C34]] ; CHECK: [[SUB13:%[0-9]+]]:_(s32) = G_SUB [[C34]], [[TRUNC6]] @@ -401,16 +398,16 @@ ; CHECK: [[SELECT20:%[0-9]+]]:_(s64) = G_SELECT [[ICMP13]](s1), [[OR17]], [[SELECT19]] ; CHECK: [[OR19:%[0-9]+]]:_(s64) = G_OR [[OR16]], [[SELECT18]] ; CHECK: [[OR20:%[0-9]+]]:_(s64) = G_OR [[OR17]], [[SELECT20]] - ; CHECK: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR19]](s64) - ; CHECK: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR20]](s64) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16) + ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR19]](s64) + ; CHECK: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR20]](s64) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK: $vgpr1 = COPY [[ANYEXT2]](s32) ; CHECK: $vgpr2 = COPY [[ANYEXT3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -399,36 +399,48 @@ bb.0: ; CHECK-LABEL: name: test_xor_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF3]](<4 x s8>) - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT1]], [[ANYEXT2]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT3]], [[ANYEXT4]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT5]], [[ANYEXT6]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT7]], [[ANYEXT8]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT9]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC1]](<4 x s8>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF2]](<4 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s8>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[EXTRACT]](<3 x s8>), 0 + ; CHECK: [[TRUNC3:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF3]](<4 x s32>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s8>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT1]], [[EXTRACT1]](<3 x s8>), 0 + ; CHECK: [[TRUNC5:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC3]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC5]](<4 x s8>) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT2]](<3 x s8>) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_XOR %0, %1