Index: include/llvm/CodeGen/GlobalISel/GISelWorkList.h =================================================================== --- include/llvm/CodeGen/GlobalISel/GISelWorkList.h +++ include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -81,6 +81,14 @@ Worklist.push_back(I); } + /// Return true if the specified key is in the GISelWorkList, false otherwise. + bool contains(const MachineInstr *I) { + auto It = WorklistMap.find(I); + if (It == WorklistMap.end()) + return false; + return true; + } + /// Remove I from the worklist if it exists. void remove(const MachineInstr *I) { assert((Finalized || WorklistMap.empty()) && "Neither finalized nor empty"); Index: lib/CodeGen/GlobalISel/Legalizer.cpp =================================================================== --- lib/CodeGen/GlobalISel/Legalizer.cpp +++ lib/CodeGen/GlobalISel/Legalizer.cpp @@ -65,36 +65,58 @@ void Legalizer::init(MachineFunction &MF) { } +// Function return true if MI is artifact, false otherwise. +// Artifacts are able to perform combine, e.g. +// %1:_(s8) = G_TRUNC %0:_(s16) +// %2:_(s64) = G_{Z|S|ANY}EXT %1:_(s8) +// G_ZEXT, G_SEXT and G_ANYEXT are artifacts. static bool isArtifact(const MachineInstr &MI) { switch (MI.getOpcode()) { default: return false; - case TargetOpcode::G_TRUNC: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: - case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_EXTRACT: + return true; + } +} + +// Function return true if MI is auxiliary artifact, false otherwise. +// Auxiliary artifact is an instruction that has to wait for an artifact to +// be processed, and in the process gets combined away, e.g. +// %1:_(s8) = G_TRUNC %0:_(s16) +// %2:_(s64) = G_{Z|S|ANY}EXT %1:_(s8) +// G_TRUNC is auxiliary artifact. +static bool isAuxiliaryArtifact(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: case TargetOpcode::G_BUILD_VECTOR: - case TargetOpcode::G_EXTRACT: return true; } } using InstListTy = GISelWorkList<256>; using ArtifactListTy = GISelWorkList<128>; +using AuxiliaryArtifactListTy = GISelWorkList<128>; namespace { class LegalizerWorkListManager : public GISelChangeObserver { InstListTy &InstList; ArtifactListTy &ArtifactList; + AuxiliaryArtifactListTy &AuxiliaryArtifactList; #ifndef NDEBUG SmallVector NewMIs; #endif public: - LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts) - : InstList(Insts), ArtifactList(Arts) {} + LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts, + AuxiliaryArtifactListTy &AuxArts) + : InstList(Insts), ArtifactList(Arts), AuxiliaryArtifactList(AuxArts) {} void createdOrChangedInstr(MachineInstr &MI) { // Only legalize pre-isel generic instructions. @@ -103,6 +125,8 @@ if (isPreISelGenericOpcode(MI.getOpcode())) { if (isArtifact(MI)) ArtifactList.insert(&MI); + else if (isAuxiliaryArtifact(MI)) + AuxiliaryArtifactList.insert(&MI); else InstList.insert(&MI); } @@ -126,6 +150,7 @@ LLVM_DEBUG(dbgs() << ".. .. Erasing: " << MI); InstList.remove(&MI); ArtifactList.remove(&MI); + AuxiliaryArtifactList.remove(&MI); } void changingInstr(MachineInstr &MI) override { @@ -159,6 +184,7 @@ // Populate Insts InstListTy InstList; ArtifactListTy ArtifactList; + AuxiliaryArtifactListTy AuxiliaryArtifactList; ReversePostOrderTraversal RPOT(&MF); // Perform legalization bottom up so we can DCE as we legalize. // Traverse BB in RPOT and within each basic block, add insts top down, @@ -173,12 +199,15 @@ continue; if (isArtifact(MI)) ArtifactList.deferred_insert(&MI); + else if (isAuxiliaryArtifact(MI)) + AuxiliaryArtifactList.deferred_insert(&MI); else InstList.deferred_insert(&MI); } } ArtifactList.finalize(); InstList.finalize(); + AuxiliaryArtifactList.finalize(); std::unique_ptr MIRBuilder; GISelCSEInfo *CSEInfo = nullptr; bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences() @@ -192,7 +221,8 @@ } else MIRBuilder = make_unique(); // This observer keeps the worklist updated. - LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); + LegalizerWorkListManager WorkListObserver(InstList, ArtifactList, + AuxiliaryArtifactList); // We want both WorkListObserver as well as CSEInfo to observe all changes. // Use the wrapper observer. GISelObserverWrapper WrapperObserver(&WorkListObserver); @@ -209,6 +239,10 @@ WrapperObserver.erasingInstr(*DeadMI); }; bool Changed = false; + // We manually move instructions from TryLaterArtifactList to ArtifactList. + // There is no need to add this list to an Observer. + GISelWorkList<8> TryLaterArtifactList; + do { while (!InstList.empty()) { MachineInstr &MI = *InstList.pop_back_val(); @@ -232,6 +266,11 @@ WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; } + + while (!TryLaterArtifactList.empty()) { + ArtifactList.insert(TryLaterArtifactList.pop_back_val()); + } + while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); @@ -253,13 +292,56 @@ Changed = true; continue; } - // If this was not an artifact (that could be combined away), this might - // need special handling. Add it to InstList, so when it's processed - // there, it has to be legal or specially handled. - else - InstList.insert(&MI); + // Artifact MI could not be combined away. + // MI could have to wait for some of it unresolved uses to be combined + // away/legalized. When this is the case add MI to TryLaterArtifactList + // and try to combine it again later. + // When MI has all uses resolved, handle MI as an instruction: + // Add MI to InstList, so when it's processed there, it has to be + // legal or specially handled. + else { + bool HasUnresolvedUses = false; + for (auto &UseOp : MI.uses()) { + if (UseOp.isReg()) { + MachineInstr *UseInst = MRI.getVRegDef(UseOp.getReg()); + // Use is an unresolved artifact or is in InstList. + // InstList now contains instructions that used to be artifacts or + // instructions created by artifact combiner. + if ((isArtifact(*UseInst) && + (ArtifactList.contains(UseInst) || + TryLaterArtifactList.contains(UseInst))) || + InstList.contains(UseInst)) { + HasUnresolvedUses = true; + break; + } + // Combine with auxiliary artifact failed. Either we are missing + // such combine or auxiliary artifact has to be legalized. + if (isAuxiliaryArtifact(*UseInst) && + AuxiliaryArtifactList.contains(UseInst)) { + AuxiliaryArtifactList.remove(UseInst); + InstList.insert(UseInst); + HasUnresolvedUses = true; + break; + } + } + } + if (HasUnresolvedUses) + TryLaterArtifactList.insert(&MI); + else + InstList.insert(&MI); + } + } + // Auxiliary artifacts that did not get combined away need to be processed + // like instructions. Do this after we have processed everything else. + if (InstList.empty() && ArtifactList.empty() && + TryLaterArtifactList.empty()) { + SmallVector Flip; + while (!AuxiliaryArtifactList.empty()) + Flip.push_back(AuxiliaryArtifactList.pop_back_val()); + while (!Flip.empty()) + InstList.insert(Flip.pop_back_val()); } - } while (!InstList.empty()); + } while (!InstList.empty() || !TryLaterArtifactList.empty()); // For now don't support if new blocks are inserted - we would need to fix the // outerloop for that. Index: test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir +++ test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir @@ -1,31 +1,29 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=aarch64 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s -# RUN: llc -march=aarch64 -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERROR %s -# ERROR: unable to legalize instruction: %3:_(s64) = G_ANYEXT %1:_(s4) (in function: test_unmerge_s4) +# RUN: llc -march=aarch64 -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_unmerge_s4 body: | bb.0: + ; CHECK-LABEL: name: test_unmerge_s4 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s16) = G_ZEXT [[TRUNC]](s8) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT1]], [[ANYEXT2]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC3]](s16) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[UV]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC4]](s4) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[UV1]](s8) - ; CHECK: $x0 = COPY [[ANYEXT3]](s64) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s8) + ; CHECK: $x0 = COPY [[ANYEXT]](s64) %0:_(s8) = G_CONSTANT i8 0 %1:_(s4), %2:_(s4)= G_UNMERGE_VALUES %0 %3:_(s64) = G_ANYEXT %1 Index: test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir +++ test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir @@ -27,19 +27,20 @@ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[DEF1]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[COPY2]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY3]], [[COPY4]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C4]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C4]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND3]](s32), [[AND4]] - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[COPY8]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY8]], [[COPY9]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[OR1]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 ; CHECK: bb.2: Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir @@ -61,12 +61,28 @@ ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[TRUNC]], [[BUILD_VECTOR]](<2 x s16>) - ; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL]], [[BUILD_VECTOR]](<2 x s16>) - ; CHECK: $vgpr0 = COPY [[ASHR]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC1]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT1]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC2]](s16) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT2]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC3]](s16) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[ZEXT3]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 %2:_(<2 x s16>) = G_SEXT %1 Index: test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -51,10 +51,11 @@ ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC1]], [[BUILD_VECTOR]] ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -511,13 +511,8 @@ ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT8]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_AND %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -448,11 +448,9 @@ ; CHECK-LABEL: name: extract_s8_v4s8_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC]](<4 x s8>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<4 x s16>), 0 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -466,11 +464,9 @@ ; CHECK-LABEL: name: extract_s8_v4s8_offset8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC]](<4 x s8>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<4 x s16>), 16 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[TRUNC]](<4 x s8>), 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 8 %2:_(s32) = G_ANYEXT %1 @@ -484,11 +480,9 @@ ; CHECK-LABEL: name: extract_s8_v4s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC]](<4 x s8>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<4 x s16>), 32 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[TRUNC]](<4 x s8>), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -502,11 +496,9 @@ ; CHECK-LABEL: name: extract_s8_v4s8_offset24 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC]](<4 x s8>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<4 x s16>), 48 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s8) = G_EXTRACT [[TRUNC]](<4 x s8>), 24 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 24 %2:_(s32) = G_ANYEXT %1 @@ -521,11 +513,9 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<3 x s16>) = G_ANYEXT [[EXTRACT]](<3 x s8>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<3 x s16>), 32 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[EXTRACT1]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s8) = G_EXTRACT [[EXTRACT]](<3 x s8>), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -540,11 +530,9 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF]](<6 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s1>) = G_EXTRACT [[TRUNC]](<6 x s1>), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(<5 x s16>) = G_ANYEXT [[EXTRACT]](<5 x s1>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[ANYEXT]](<5 x s16>), 64 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[EXTRACT1]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC1]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s1) = G_EXTRACT [[EXTRACT]](<5 x s1>), 4 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s1) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 %2:_(s32) = G_ANYEXT %1 @@ -916,10 +904,7 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF]](<6 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[TRUNC]](<6 x s16>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s16>) = G_TRUNC [[DEF1]](<6 x s32>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[TRUNC1]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[EXTRACT]](<5 x s16>), 0 ; CHECK: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -321,12 +321,8 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[DEF]](<4 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s1>) = G_EXTRACT [[TRUNC]](<4 x s1>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s1>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s1>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT]](<3 x s32>) %0:_(<3 x s1>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -355,12 +351,8 @@ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -511,13 +511,8 @@ ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT8]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_OR %0, %1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir @@ -1,6 +1,6 @@ # RUN: not llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s 2>&1 | FileCheck %s -# CHECK: LLVM ERROR: unable to legalize instruction: %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0:_(<2 x s1>) (in function: test_unmerge_v2s1) +# CHECK: LLVM ERROR: unable to legalize instruction: %0:_(<2 x s1>) = G_TRUNC %3:_(<2 x s32>) (in function: test_unmerge_v2s1) --- name: test_unmerge_v2s1 Index: test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_unmerge_s32_s64 @@ -250,31 +250,28 @@ ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]](s128) ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C]] ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C2]](s64), [[C1]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C2]](s64), [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C4]] - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) - ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB1]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[TRUNC]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[AND1]], [[TRUNC]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[SUB1]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB]](s32) + ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[SUB]](s32) ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] - ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV3]], [[SELECT1]] - ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV4]], [[SELECT]] - ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[UV5]], [[SELECT2]] + ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[AND1]], [[SELECT1]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[AND]], [[SELECT]] + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[AND1]], [[SELECT2]] ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 30 - ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C5]](s64), [[C1]](s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV2]](s128) + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C5]](s64), [[C1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC1]], [[C3]] ; CHECK: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC1]] ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC1]](s32), [[C3]] @@ -290,8 +287,8 @@ ; CHECK: [[OR4:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[SELECT3]] ; CHECK: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR2]], [[SELECT5]] ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 45 - ; CHECK: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C6]](s64), [[C1]](s64) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s128) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C6]](s64), [[C1]](s64) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[MV2]](s128) ; CHECK: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC2]], [[C3]] ; CHECK: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC2]] ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC2]](s32), [[C3]] @@ -307,8 +304,8 @@ ; CHECK: [[OR7:%[0-9]+]]:_(s64) = G_OR [[OR4]], [[SELECT6]] ; CHECK: [[OR8:%[0-9]+]]:_(s64) = G_OR [[OR5]], [[SELECT8]] ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; CHECK: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C7]](s64), [[C1]](s64) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[MV4]](s128) + ; CHECK: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C7]](s64), [[C1]](s64) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s128) ; CHECK: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC3]], [[C3]] ; CHECK: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC3]] ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC3]](s32), [[C3]] @@ -324,8 +321,8 @@ ; CHECK: [[OR10:%[0-9]+]]:_(s64) = G_OR [[OR7]], [[SELECT9]] ; CHECK: [[OR11:%[0-9]+]]:_(s64) = G_OR [[OR8]], [[SELECT11]] ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 75 - ; CHECK: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C8]](s64), [[C1]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[MV5]](s128) + ; CHECK: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C8]](s64), [[C1]](s64) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[MV4]](s128) ; CHECK: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[TRUNC4]], [[C3]] ; CHECK: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC4]] ; CHECK: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC4]](s32), [[C3]] @@ -341,8 +338,8 @@ ; CHECK: [[OR13:%[0-9]+]]:_(s64) = G_OR [[OR10]], [[SELECT12]] ; CHECK: [[OR14:%[0-9]+]]:_(s64) = G_OR [[OR11]], [[SELECT14]] ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 90 - ; CHECK: [[MV6:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C9]](s64), [[C1]](s64) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[MV6]](s128) + ; CHECK: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C9]](s64), [[C1]](s64) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[MV5]](s128) ; CHECK: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[TRUNC5]], [[C3]] ; CHECK: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC5]] ; CHECK: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC5]](s32), [[C3]] @@ -358,8 +355,8 @@ ; CHECK: [[OR16:%[0-9]+]]:_(s64) = G_OR [[OR13]], [[SELECT15]] ; CHECK: [[OR17:%[0-9]+]]:_(s64) = G_OR [[OR14]], [[SELECT17]] ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 105 - ; CHECK: [[MV7:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C10]](s64), [[C1]](s64) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[MV7]](s128) + ; CHECK: [[MV6:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C10]](s64), [[C1]](s64) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[MV6]](s128) ; CHECK: [[SUB12:%[0-9]+]]:_(s32) = G_SUB [[TRUNC6]], [[C3]] ; CHECK: [[SUB13:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC6]] ; CHECK: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s32), [[C3]] @@ -374,16 +371,16 @@ ; CHECK: [[SELECT20:%[0-9]+]]:_(s64) = G_SELECT [[ICMP13]](s1), [[OR17]], [[SELECT19]] ; CHECK: [[OR19:%[0-9]+]]:_(s64) = G_OR [[OR16]], [[SELECT18]] ; CHECK: [[OR20:%[0-9]+]]:_(s64) = G_OR [[OR17]], [[SELECT20]] - ; CHECK: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR19]](s64) - ; CHECK: [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR20]](s64) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV12]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV13]](s16) + ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR19]](s64) + ; CHECK: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR20]](s64) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK: $vgpr1 = COPY [[ANYEXT2]](s32) ; CHECK: $vgpr2 = COPY [[ANYEXT3]](s32) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -511,13 +511,8 @@ ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<4 x s32>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT8]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_XOR %0, %1